@Preamble{
"\hyphenation{ }" #
"\ifx \undefined \circled \def \circled #1{(#1)} \fi" #
"\ifx \undefined \reg \def \reg {\circled{R}}\fi" #
"\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-COMP-ARCH-NEWS = "ACM SIGARCH Computer Architecture News"}
@String{pub-ACM = "ACM Press"}
@String{pub-ACM:adr = "New York, NY 10036, USA"}
@String{pub-IEEE = "IEEE Computer Society Press"}
@String{pub-IEEE:adr = "1109 Spring Street, Suite 300,
Silver Spring, MD 20910, USA"}
@String{pub-MORGAN-KAUFMANN = "Morgan Kaufmann Publishers"}
@String{pub-MORGAN-KAUFMANN:adrsf = "San Francisco, CA, USA"}
@Article{Foster:1972:RDM,
author = "Caxton C. Foster",
title = "A review of dynamic memories with enhanced data access
by {Harold S. Stone. IEEETC Vol. C-21, \#4, p 359--386,
April 1972}",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "2",
pages = "3--7",
month = apr,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:38 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bataille:1972:SOG,
author = "M. Bataille",
title = "Something old: the {Gamma 60} the computer that was
ahead of its time",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "2",
pages = "10--15",
month = apr,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:38 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Foster:1972:SNI,
author = "Caxton C. Foster",
title = "Something new: the {Intel MCS-4} micro computer set",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "2",
pages = "16--17",
month = apr,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:38 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1972:MNC,
author = "J. A. N. Lee",
title = "My next compiler",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "2",
pages = "17--19",
month = apr,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:38 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Flynn:1972:CAJ,
author = "Michael J. Flynn and Mrs. Carol Rogers",
title = "Computer architecture at {Johns Hopkins}",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "2",
pages = "21--33",
month = apr,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:38 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vaughan:1972:CAS,
author = "R. F. Vaughan and R. A. Collins",
title = "On computer architecture, software portability \&
microprogramming",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "4",
pages = "14--15",
month = oct,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brakefield:1972:OFP,
author = "James C. Brakefield",
title = "An optimal floating point format",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "4",
pages = "16--17",
month = oct,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brewer:1972:RDD,
author = "J. E. Brewer",
title = "Recent doctoral dissertations of interest to
{SIGARCH}",
journal = j-COMP-ARCH-NEWS,
volume = "1",
number = "4",
pages = "18--20",
month = oct,
year = "1972",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bettcher:1973:TSR,
author = "C. W. Bettcher",
title = "Thread standardization and relative cost",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "1",
pages = "9--9",
month = jan,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "This is a reprint of an article published in the {\em
Journal of the Society of Automotive Engineers}, Volume
XVIII, Number 2, p. 131, February 1926, about the cost
of the lack of standardization of screw threads. {\em
Computer Architecture News\/} Editor-in-Chief Caxton C.
Foster has added a hand-written note ``of course, there
is no message here for {\em us}.''",
}
@Article{Sites:1973:FPS,
author = "Richard L. Sites",
title = "Floating point significance interrupt proposal",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "1",
pages = "10--12",
month = jan,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The purpose of this proposal is to aid numerical
analysts in observing the significance of results in
floating-point calculations. This proposal is not a
cure-all, but it does attempt to a first, high-payoff
step in understanding and analyzing floating-point
algorithms. This proposal is specifically for IBM
360/370 architecture, but the ideas are applicable to
all machines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "The author observes that register clearing by
subtraction is common, and is one of the reasons that
``all IBM language processors execute with significance
masked off.'' He proposes suppressing the significance
interrupt in subtractions when both operands are
equal.",
}
@Article{Foster:1973:CA,
author = "Caxton Foster",
title = "Computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "1",
pages = "13--18",
month = jan,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Adler:1973:MCC,
author = "Louis S. Adler",
title = "A mini-computer configuration for {CAI}: a systems
engineering view",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "3",
pages = "10--19",
month = oct,
year = "1973",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216456.1216457",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:31:17 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Computer assisted instruction (CAI) has not impacted
the educational world with the degree of success which
early proponents predicted. Although CAI has proven to
be a more efficient learning tool than common
traditional methods in specific instances, the overall
success of such systems has been sporadic. There is no
question that a well-designed and correctly implemented
CAI system can be highly effective; however, several
important factors must be overcome to guarantee a
reasonable amount of success. These are:\par
* Overcoming the present high cost of hardware while
still providing a reliable system having acceptable
display capability.\par
* Developing a software real-time operating system
which guarantees fast response times.\par
* Authoring high quality courseware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gentleman:1973:TC,
author = "W. M. Gentleman and B. A. Wichmann",
title = "Timing on computers",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "3",
pages = "20--23",
month = oct,
year = "1973",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216456.1216458",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:31:17 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
URL = "ftp://ftp.math.utah.edu/pub/mirrors/ftp.ira.uka.de/bibliography/Misc/monitor.bib",
abstract = "Most computers today provide some form of clock which
can be read by software. The purpose of this note is to
illustrate why in many existing systems, the facilities
offered are inadequate for ordinary programmers.
Proposals are made for changes in both hardware and
software to remedy these deficiencies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schank:1973:AAS,
author = "Karl Schank",
title = "Architectural assistance to software debugging aids",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "3",
pages = "37--38",
month = oct,
year = "1973",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216456.1216459",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:31:17 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "It has been observed [1] that 45 to 50\% of
programming effort is spent in debugging, checkout and
testing, yet the architecture of most modern computer
systems does little if anything to facilitate ease of
debugging. In most batch systems the programmer is
sufficiently removed from the execution of his program
as to be severely handicapped in diagnosing errors.
There is only so much information that can be easily
obtained from a voluminous core dump, for instance.
Even programmers on large timesharing systems have
available at most an interactive software debugging
package which operates through a combination of
insertions and replacements of object code and
interpretation (rather than execution) of machine code.
This can get to be quite inefficient when carried to
the extreme and often is useful only if the program has
been processed by a special compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhandarkar:1973:MCM,
author = "Dileep P. Bhandarkar and Samuel H. Fuller",
title = "{Markov} chain models for analyzing memory
interference in multiprocessor computer systems",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "1--6",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anderson:1973:IDP,
author = "George A. Anderson",
title = "Interconnecting a distributed processor system for
avionics",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "11--16",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goke:1973:BNP,
author = "L. Rodney Goke and G. J. Lipovski",
title = "{Banyan} networks for partitioning multiprocessor
systems",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "21--28",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jordan:1973:SDS,
author = "Harry F. Jordan and Burton J. Smith",
title = "Structure of digital system description languages",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "31--34",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1973:VDS,
author = "John A. N. Lee",
title = "{VDL}---a definition system for all levels",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "41--48",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Radoy:1973:MPP,
author = "Charles H. Radoy and George P. {Copeland, Jr.} and G.
J. Lipovski",
title = "A methodology for parallel processing design
tradeoffs",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "51--56",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reddaway:1973:DDA,
author = "S. F. Reddaway",
title = "{DAP}---a distributed array processor",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "61--65",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kogge:1973:MRP,
author = "Peter M. Kogge",
title = "Maximal rate pipelined solutions to recurrence
problems",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "71--76",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agerwala:1973:CCL,
author = "Tilak Agerwala and Mike Flynn",
title = "Comments on capabilities, limitations and
``correctness'' of {Petri} nets",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "81--86",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Omohundro:1973:FFC,
author = "Wayne E. Omohundro and James H. Tracey",
title = "{Flowware}---a flow charting procedure to describe
digital networks",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "91--97",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barbacci:1973:AED,
author = "Mario R. Barbacci and Daniel P. Siewiorek",
title = "Automated exploration of the design space for register
transfer {(RT)} systems",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "101--106",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Laliotis:1973:IAS,
author = "T. A. Laliotis",
title = "Implementation aspects of the symbol hardware
compiler",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "111--115",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Copeland:1973:ACC,
author = "George P. {Copeland, Jr.} and G. J. Lipovski and
Stanley Y. W. Su",
title = "The architecture of {CASSM}: a cellular system for
non-numeric processing",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "121--128",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hemphill:1973:DDG,
author = "John M. Hemphill and S. A. Szygenda",
title = "Deriving design guidelines for diagnosable computer
systems",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "131--135",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parhami:1973:DFT,
author = "Behrooz Parhami and Algirdas Avizienis",
title = "Design of fault-tolerant associative processors",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "141--145",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fischler:1973:FTM,
author = "M. A. Fischler and O. Firschein",
title = "A fault tolerant multiprocessor architecture for
real-time control applications",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "151--157",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1973:VFS,
author = "G. J. Lipovski",
title = "A varistructured fail-soft cellular computer",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "161--165",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vaucher:1973:HLC,
author = "Jean Vaucher and Christian Rey",
title = "A hardware laboratory for computer architecture
research",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "171--175",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Knoke:1973:SEC,
author = "P. J. Knoke",
title = "Simulation exercises for computer architecture
education",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "181--185",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sloan:1973:CAC,
author = "M. E. Sloan",
title = "Computer architecture courses in electrical
engineering departments",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "191--195",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hartenstein:1973:IHC,
author = "R. Hartenstein",
title = "Increasing hardware complexity---a challenge to
computer architecture education",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "201--206",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rossmann:1973:RWC,
author = "George Rossmann",
title = "Review of the {{\em Workshop on Computer Architecture
Education}}",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "211--214",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cooper:1973:MMB,
author = "Richard G. Cooper",
title = "{Micromodules}: Microprogrammable building blocks for
hardware development",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "221--226",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fuller:1973:CMA,
author = "S. H. Fuller and D. P. Siewiorek and R. J. Swan",
title = "Computer Modules: an architecture for large digital
modules",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "231--237",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zaks:1973:MAF,
author = "Rodnay Zaks",
title = "A microprogrammed architecture for front end
processing",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "241--246",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vranesic:1973:DFV,
author = "Z. G. Vranesic and V. C. Hamacher and Y. Y. Leung",
title = "Design of a fully variable-length structured
minicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "251--255",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Marvel:1973:HHA,
author = "Orin E. Marvel",
title = "Happe {Honeywell Associative Parallel Processing
Ensemble}",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "261--267",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schaffner:1973:CAP,
author = "Mario R. Schaffner",
title = "A computer architecture and its programming language",
journal = j-COMP-ARCH-NEWS,
volume = "2",
number = "4",
pages = "271--277",
month = dec,
year = "1973",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shore:1974:CCa,
author = "John Shore",
title = "Conjecture corner",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "1",
pages = "3--6",
month = mar,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McKeeman:1974:CDE,
author = "W. M. McKeeman",
title = "Computer design evaluation using programming language
primitives",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "1",
pages = "7--18",
month = mar,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hartenstein:1974:LMI,
author = "Reiner W. Hartenstein",
title = "Letter to membership from incoming chairman {(CAN,
Oct. 73)}",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "1",
pages = "19--22",
month = mar,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stryker:1974:SSA,
author = "David Stryker and David Weiss",
title = "Secure system architecture",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "2",
pages = "37--38",
month = jun,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Su:1974:BRL,
author = "Stephen Y. H. Su",
title = "Book review of {{\em Logic and Logic Design\/}} by {B.
Girling and H. G. Morning. International Textbook
Company Limited 1973}",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "3",
pages = "2--3",
month = sep,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shore:1974:CCb,
author = "John Shore",
title = "Conjecture corner",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "3",
pages = "4--9",
month = sep,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nisnevich:1974:DPC,
author = "L. Nisnevich and E. Strasbourger",
title = "Decentralized priority control in data communication",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "1--6",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reames:1974:LNS,
author = "Cecil C. Reames and Ming T. Liu",
title = "A loop network for simultaneous transmission of
variable-length messages",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "7--12",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Callan:1974:APS,
author = "James F. Callan",
title = "The architecture of the {Picture System}",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "13--16",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "Evans \& Sutherland Picture System",
}
@Article{Staudhammer:1974:FDO,
author = "John Staudhammer and Jeffrey F. Eastman and James N.
England",
title = "A fast display-oriented processor",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "17--22",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eastman:1974:CDC,
author = "Jeffrey F. Eastman and John Staudhammer",
title = "Computer display of colored three-dimensional
objects",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "23--27",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kerr:1974:MPI,
author = "Henry D. Kerr",
title = "A microprogrammed processor for interactive computer
graphics",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "28--33",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Armstrong:1974:FMT,
author = "C. V. W. Armstrong",
title = "Functional memory techniques applied to the
microprogrammed control of an associative processor",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "34--40",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wade:1974:IDM,
author = "James F. Wade and Paul D. Stigall",
title = "Instruction design to minimize program size",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "41--44",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bondi:1974:HHM,
author = "James O. Bondi and Paul D. Stigall",
title = "{HMO}, a hardware microcode optimizer",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "45--51",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Peskin:1974:CAD,
author = "A. M. Peskin",
title = "The computer aided design of processor architectures",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "51--55",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huen:1974:IPR,
author = "W. H. Huen and D. P. Siewiorek",
title = "Intermodule protocol for register transfer level
modules: representation and analytic tools",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "56--62",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Isaacson:1974:PSP,
author = "Portia Isaacson",
title = "Picture systems, {PS}, and the design of a
channel-to-channel computer interface",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "63--70",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lofgren:1974:RCT,
author = "Lennart L{\"o}fgren",
title = "Reference concepts in a tree structured address
space",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "71--79",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anderson:1974:VMM,
author = "Judith A. Anderson and G. J. Lipovski",
title = "A virtual memory for microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "80--84",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brundage:1974:PED,
author = "R. E. Brundage and A. P. Batson",
title = "The performance enhancement of descriptor-based
virtual memory systems through the use of associative
registers",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "85--90",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Marvel:1974:SSP,
author = "Orin E. Marvel",
title = "{SPEAC}: special purpose electronic area correlator",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "91--94",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Satterfield:1974:AAS,
author = "James M. Satterfield",
title = "Architectural advances of the space shuttle orbiter
avionics computer system",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "95--98",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kodres:1974:DSA,
author = "Uno R. Kodres and William L. McCracken",
title = "Design study of an avionics navigation microcomputer",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "99--105",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kane:1974:ISI,
author = "Gerald R. Kane",
title = "An iteratively structured information processor",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "106--112",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Richards:1974:HSI,
author = "H. {Richards, Jr.} and A. E. Oldehoeft",
title = "Hardware-software interactions in {SYMBOL-2R}'s
operating system",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "113--118",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sylvain:1974:DEA,
author = "Pierre Sylvain and Maniel Vineberg",
title = "The design and evaluation of the array machine: a
high-level language processor",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "119--125",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dennis:1974:PAB,
author = "Jack B. Dennis and David P. Misunas",
title = "A preliminary architecture for a basic data-flow
processor",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "126--132",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Berkling:1974:RLR,
author = "K. J. Berkling",
title = "Reduction languages for reduction machines",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "133--140",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{King:1974:ODS,
author = "Willis K. King and Fulvio Carbonaro",
title = "Output devices sharing by minicomputers",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "141--145",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rannem:1974:RSC,
author = "S. Rannem and V. C. Hamacher and S. G. Zaky and P.
Connolly",
title = "On relating small computer performance to design
parameters",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "146--151",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lawson:1974:ASH,
author = "Harold W. {Lawson, Jr.} and Bengt Magnhagen",
title = "Advantages of structured hardware",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "152--158",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kornerup:1974:CMS,
author = "Peter Kornerup",
title = "Concepts of the {MATHILDA} system",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "159--164",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Foster:1974:S,
author = "Caxton C. Foster",
title = "{SOCRATES}",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "165--169",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wann:1974:CCS,
author = "Donald F. Wann and Robert A. Ellis",
title = "Conjoined computer systems: an architecture for
laboratory data processing and instrument control",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "170--175",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jensen:1974:DFC,
author = "E. Douglas Jensen",
title = "A distributed function computer for real-time
control",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "176--182",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Radoy:1974:SMI,
author = "C. H. Radoy and G. J. Lipovski",
title = "Switched multiple instruction, multiple data stream
processing",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "183--187",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lechner:1974:SED,
author = "Robert J. Lechner",
title = "Sequentially encoded data structures that support
bidirectional scanning",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "188--194",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Freeman:1974:ICE,
author = "Martin Freeman",
title = "An instruction class for an extensible interpreter",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "195--200",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Giloi:1974:SCC,
author = "W. K. Giloi and H. Berg",
title = "{STARLET}: a computer concept based on ordered sets as
primitive data types",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "201--206",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cornell:1974:CGP,
author = "R. G. Cornell and H. C. Torng",
title = "A cellular general purpose computer",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "207--213",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goldstein:1974:MOR,
author = "Barry C. Goldstein and Thomas W. Scrutchin",
title = "A machine-oriented resource management architecture",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "214--219",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sloan:1974:DOC,
author = "M. E. Sloan",
title = "A design-oriented computer engineering program",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "220--224",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baron:1974:ELC,
author = "Janis Beitch Baron and D. E. Atkins",
title = "An educational laboratory in contemporary digital
design",
journal = j-COMP-ARCH-NEWS,
volume = "3",
number = "4",
pages = "225--231",
month = dec,
year = "1974",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1975:ACFa,
author = "W. R. Smith",
title = "{AADC} computer family architecture program",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "1",
pages = "4--8",
month = mar,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lunde:1975:MDW,
author = "{\AA}mund Lunde",
title = "More data on the {O/W} ratios: a note on a paper by
{Flynn}",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "1",
pages = "9--13",
month = mar,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1975:NNA,
author = "G. Jack Lipovski and Stanley Y. W. and Sr",
title = "On non-numeric architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "1",
pages = "14--29",
month = mar,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Boulaye:1975:SDS,
author = "Guy. G. Boulaye",
title = "Structured design for structured computer
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "2",
pages = "8--17",
month = jun,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:42 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parnas:1975:ECA,
author = "D. L. Parnas",
title = "Evaluation criteria for abstract machines with unknown
applications",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "3",
pages = "2--9",
month = sep,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: The AADC computer family architecture
project",
}
@Article{Smith:1975:ACFb,
author = "William R. Smith",
title = "{AADC} computer family architecture questions and
answers",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "3",
pages = "15--21",
month = sep,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: The AADC computer family architecture
project",
}
@Article{Su:1975:ICC,
author = "Stephen Y. H. Su",
title = "An introduction to {CHDL} (computer hardware
description languages)",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "3",
pages = "22--23",
month = sep,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Doran:1975:ICL,
author = "R. W. Doran",
title = "The {International Computers Ltd. ICL2900} computer
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "3",
pages = "24--47",
month = sep,
year = "1975",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bell:1976:CSW,
author = "Gordon Bell and William D. Strecker",
title = "Computer structures: {What} have we learned from the
{PDP-11}?",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "1--14",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kerner:1976:PLL,
author = "Helmut Kerner and Werner Beyerle",
title = "A {PMS} level language for performance evaluation
modelling {(V-PMS)}",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "15--19",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moalla:1976:DTM,
author = "M. Moalla and G. Saucier and J. Sifakis and M.
Zachariades",
title = "A design tool for the multilevel description and
simulation of systems of interconnected modules",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "20--27",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Allen:1976:CCS,
author = "Jonathan Allen",
title = "A course in computer structures",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "28--32",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rossmann:1976:ICS,
author = "George E. Rossmann",
title = "The {IEEE Computer Society} task force on computer
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "33--33",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Widdoes:1976:MMM,
author = "Lawrence C. {Widdoes, Jr.}",
title = "The {Minerva} multi-microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "34--39",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arnold:1976:HRM,
author = "R. G. Arnold and E. W. Page",
title = "A hierarchical, restructurable multi-microprocessor
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "40--45",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McGill:1976:MAN,
author = "Robert McGill and John Steinhoff",
title = "A multimicroprocessor approach to numerical analysis:
{An} application to gaming problems",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "46--51",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jensen:1976:MIS,
author = "John E. Jensen and Jean-Loup Baer",
title = "A model of interference in a shared resource
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "52--57",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Leung:1976:CSF,
author = "Clement K. C. Leung and David P. Misunas and Andrij
Neczwid and Jack B. Dennis",
title = "A computer simulation facility for packet
communication architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "58--63",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rege:1976:CPS,
author = "S. L. Rege",
title = "Cost, performance and size tradeoffs for different
levels in a memory hierarchy",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "64--67",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dworak:1976:IIR,
author = "Paul E. Dworak and Alice C. Parker",
title = "An input interface for a real-time digital sound
generation system",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "68--73",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mulder:1976:MOD,
author = "Michael C. Mulder and Patrick P. Fasang",
title = "A microprocessor oriented data acquisition and control
system for power system control",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "74--78",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gladney:1976:MRT,
author = "H. M. Gladney and G. Hochweller",
title = "Multiprogramming for real-time applications",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "79--85",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kehl:1976:BAH,
author = "Theodore H. Kehl",
title = "{Basil} architecture --- an {HLL} minicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "86--92",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lawson:1976:FDC,
author = "Harold W. {Lawson, Jr.}",
title = "Function distribution in computer system
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "93--97",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vissers:1976:IDA,
author = "Chris A. Vissers",
title = "Interface, a dispersed architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "98--104",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomasian:1976:DSS,
author = "A. Thomasian and A. Avizienis",
title = "A design study of a shared resource computing system",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "105--112",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ford:1976:HSI,
author = "W. S. Ford and V. C. Hamacher",
title = "Hardware support for inter-process communication and
processor sharing",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "113--118",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Trambacz:1976:TDP,
author = "Ulrich Trambacz and Georg Hyla",
title = "A taxonomy of display processors",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "119--120",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kluge:1976:TBT,
author = "W. E. Kluge",
title = "Traversing binary tree structures with shift register
memories (recent results)",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "121.1--121.1",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fernandez:1976:ASS,
author = "Eduardo B. Fernandez and Rita C. Summers and Charles
D. Coleman",
title = "Architectural support for system protection (recent
results)",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "121.2--121.2",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gault:1976:DUP,
author = "James W. Gault and Alice C. Parker",
title = "The design of a user-programmable digital interface
(recent results)",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "121.3--121.3",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fournier:1976:SDG,
author = "Serge Fournier and Ming T. Liu",
title = "System design of a grammar-programmable high-level
language machine",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "122.4--122.4",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kuznia:1976:SSM,
author = "Ch. Kuznia and R. Kober and H. Kopp",
title = "{SMS 101} --- a structured multi microprocessor
system with deadlock-free operation scheme",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "122.5--122.5",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:1976:SSD,
author = "Philip S. Liu and Frederic J. Mowle",
title = "Selection schemes for dynamically microcoding
{Fortran} programs",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "122.6--122.6",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fuller:1976:DMM,
author = "S. H. Fuller and D. P. Siewiorek and R. J. Swan",
title = "The design of a multi-micro-computer system",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "123--123",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reames:1976:DSD,
author = "Cecil C. Reames and Ming T. Liu",
title = "Design and simulation of the distributed loop computer
network {(DLCN)}",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "124--129",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Franchi:1976:DFC,
author = "Paolo Franchi",
title = "Distribution of functions and control in {RPCNET}",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "130--135",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wittie:1976:EMR,
author = "Larry D. Wittie",
title = "Efficient message routing in {Mega-Micro-Computer}
networks",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "136--140",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Welch:1976:IDO,
author = "Terry A. Welch",
title = "An investigation of descriptor oriented architecture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "141--146",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Feustel:1976:TAS,
author = "E. A. Feustel",
title = "Tagged architecture and the semantics of programming
languages: {Extensible} types",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "147--150",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Batson:1976:DDA,
author = "A. P. Batson and R. E. Brundage and J. P. Kearns",
title = "Design data for {Algol-60} machines",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "151--154",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Strecker:1976:CMP,
author = "William D. Strecker",
title = "Cache memories for {PDP-11} family computers",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "155--158",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patel:1976:ITP,
author = "Janak H. Patel and Edward S. Davidson",
title = "Improving the throughput of a pipeline by insertion of
delays",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "159--164",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abd-Alla:1976:LAT,
author = "A. M. Abd-Alla and Laird H. Moffett",
title = "On-line architecture tuning using microcapture",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "165--171",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Healy:1976:COC,
author = "Leonard D. Healy",
title = "A character-oriented context-addressed
segment-sequential storage",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "172--177",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bush:1976:SIS,
author = "J. A. Bush and G. J. Lipovski and S. Y. W. su and J.
K. Watson and S. J. Ackerman",
title = "Some implementations of segment sequential functions",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "178--185",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeMartinis:1976:SMS,
author = "Manlio DeMartinis and G. Jack Lipovski and Stanley Y.
W. Su and J. K. Watson",
title = "A {Self Managing Secondary Memory} system",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "186--194",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fuller:1976:PPC,
author = "Samuel H. Fuller",
title = "Price\slash performance comparison of {C.mmp} and the
{PDP-10}",
journal = j-COMP-ARCH-NEWS,
volume = "4",
number = "4",
pages = "195--202",
month = jan,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorelli:1976:RAC,
author = "Lars-Erik Thorelli",
title = "Representation of arrays in computers",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "1",
pages = "6--9",
month = apr,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Berndt:1976:ECA,
author = "Helmut Berndt",
title = "Evolutionary computer architecture: the {Unidata
7.000} series",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "1",
pages = "10--16",
month = apr,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dennis:1976:CAC,
author = "Jack B. Dennis",
title = "Computer architecture and the cost of software",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "1",
pages = "17--21",
month = apr,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lindamood:1976:NCA,
author = "George Lindamood",
title = "On navel contemplation and the art of computer
maintenance",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "1",
pages = "22--23",
month = apr,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fuller:1976:IMS,
author = "S. H. Fuller and G. A. Mathew",
title = "Implementing microprogram storage with {PLA}'s",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "2",
pages = "6--11",
month = jun,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:42 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hicks:1976:GQS,
author = "D. R. Hicks",
title = "A generalized queue scheme for process synchronization
and communication",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "2",
pages = "12--14",
month = jun,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:42 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Langdon:1976:BRR,
author = "Glen G. Langdon",
title = "Book reviews: Review of {{\em Introduction to Computer
Architecture\/}} by {Harold S. Stone}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "2",
pages = "17--19",
month = jun,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:42 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thurber:1976:ANR,
author = "Kenneth J. Thurber",
title = "{ARPS}: a new real-time computer",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "4",
pages = "6--16",
month = oct,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:09 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Salisbury:1976:MMC,
author = "Alan B. Salisbury",
title = "{MCF}: a military computer family for computer-based
systems",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "4",
pages = "17--20",
month = oct,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:09 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ris:1976:UDF,
author = "Frederic N. Ris",
title = "A unified decimal floating-point architecture for the
support of high-level languages",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "4",
pages = "21--31",
month = oct,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:09 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1976:QS,
author = "G. Jack Lipovski",
title = "A question of style",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "4",
pages = "32--38",
month = oct,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:09 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chroust:1976:DIV,
author = "G. Chroust",
title = "Data interfaces versus control interfaces: a
half-baked conjecture",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "4",
pages = "39--40",
month = oct,
year = "1976",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:09 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Langdon:1977:CFM,
author = "Glen G. Langdon",
title = "Considerations on the ``figure of merit'' technique
for storage hierarchy design",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "6",
pages = "25--28",
month = feb,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Miller:1977:BRRb,
author = "Edward F. Miller",
title = "Book Reviews: Review of {{\em High-Level Language
Computer Architecture\/}} by {Yaohan Chu. Academic
Press, New York, 1975}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "6",
pages = "29--29",
month = feb,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chu:1977:AHD,
author = "Yaohan Chu",
title = "Architecture of a hardware data interpreter",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "1--9",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dasgupta:1977:DSL,
author = "Subrata Dasgupta",
title = "The design of some language constructs for horizontal
microprogramming",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "10--16",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jensen:1977:HMM,
author = "E. Douglas Jensen and Richard Y. Kain",
title = "The {Honeywell Modular Microprogram Machine}: {M3}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "17--28",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramseyer:1977:MMI,
author = "Richard R. Ramseyer and Andries van Dam",
title = "A multi-microprocessor implementation of a general
purpose pipelined {CPU}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "29--34",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ravi:1977:HMS,
author = "C. V. Ravi and Torben Moller",
title = "A hierarchical microcomputer system for hardware and
software development",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "35--40",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harris:1977:HMO,
author = "J. Archer Harris and David R. Smith",
title = "Hierarchical multiprocessor organizations",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "41--48",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hurakami:1977:PPS,
author = "K. Hurakami and S. Nishikawa and M. Sato",
title = "Poly-Processor {System} analysis and design",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "49--56",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mazare:1977:FEH,
author = "Guy Mazare",
title = "A few examples of how to use a symmetrical
multi-micro-processor",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "57--62",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kogge:1977:MPP,
author = "Peter M. Kogge",
title = "The microprogramming of pipelined processors",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "63--69",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Siegel:1977:UVT,
author = "Howard Jay Siegel",
title = "The universality of various types of {SIMD} machine
interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "70--79",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rau:1977:EIF,
author = "Ramakrishna B. Rau and George E. Rossmann",
title = "The effect of instruction fetch strategies upon the
performance of pipelined instruction units",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "80--89",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ahuja:1977:MMS,
author = "S. R. Ahuja and J. R. Jump",
title = "A modular memory scheme for array processing",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "90--94",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Haynes:1977:AAC,
author = "Leonard S. Haynes",
title = "The architecture of an {ALGOL 60} computer implemented
with distributed processors",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "95--104",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sullivan:1977:LSHa,
author = "Herbert Sullivan and T. R. Bashkow",
title = "A large scale, homogeneous, fully distributed parallel
machine, {I}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "105--117",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sullivan:1977:LSHb,
author = "Herbert Sullivan and Theodore R. Bashkow and David
Klappholz",
title = "A Large Scale, Homogeneous, Fully Distributed Parallel
Machine, {II}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "118--124",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1977:VMM,
author = "G. Jack Lipovski",
title = "On virtual memories and micronetworks",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "125--134",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Strauss:1977:CNT,
author = "Jon C. Strauss and Kenneth J. Thurber",
title = "Considerations for new tactical computer systems",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "135--140",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thurber:1977:ATC,
author = "Kenneth J. Thurber and Peter C. Patton and Robert C.
Deward and Jon C. Strauss and Thomas W. Petschauer",
title = "An advanced tactical computer concept",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "141--146",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nutt:1977:MIP,
author = "Gary J. Nutt",
title = "Microprocessor implementation of a parallel
processor",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "147--152",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dworak:1977:DIR,
author = "Paul Dworak and Alice C. Parker and Richard Blum",
title = "The design and implementation of a real-time sound
generation system",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "153--158",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parker:1977:HST,
author = "A. C. Parker and A. W. Nagle",
title = "Hardware\slash software tradeoffs in a variable word
width, variable queue length buffer memory",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "159--164",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Peuto:1977:ITM,
author = "Bernard L. Peuto and Leonard J. Shustek",
title = "An instruction timing model of {CPU} performance",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "165--178",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hoogendoorn:1977:RMI,
author = "Cornelis H. Hoogendoorn",
title = "Reduction of memory interference in multiprocessor
systems",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "179--183",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hammerstrom:1977:ICC,
author = "D. W. Hammerstrom and E. S. Davidson",
title = "Information content of {CPU} memory referencing
behavior",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "184--192",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:1977:MCP,
author = "Ming T. Liu and Cecil C. Reames",
title = "Message communication protocol and operating system
design for the {Distributed Loop Computer Network
(DLCN)}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "193--200",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Poujoulat:1977:ACB,
author = "G. H. Poujoulat",
title = "Architecture of the {CORAIL} building block system",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "201--204",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tredennick:1977:HSB,
author = "H. L. Tredennick and T. A. Welch",
title = "High-speed buffering for variable length operands",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "7",
pages = "205--210",
month = mar,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Steel:1977:AGP,
author = "Rod Steel",
title = "Another general purpose computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "8",
pages = "5--11",
month = apr,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lindamood:1977:WN,
author = "George E. Lindamood",
title = "What's in a name?",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "8",
pages = "12--14",
month = apr,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schneiker:1977:MF,
author = "Conrad Schneiker",
title = "The microprocessors of the future",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "8",
pages = "15--16",
month = apr,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Miller:1977:BRR,
author = "Edward F. {Miller, Jr.}",
title = "Book review: Review of {{\em Large-Scale Computer
Architecture: Parallel and Associative Processors\/}}
by {Kenneth J. Thurber, Hayden Book Company, Rochelle
Park, New Jersey 1976}",
journal = j-COMP-ARCH-NEWS,
volume = "5",
number = "8",
pages = "17--17",
month = apr,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Conner:1977:IOC,
author = "William M. Conner and Edward R. Dirling",
title = "Input\slash Output considerations in look-ahead
processing",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "1",
pages = "7--12",
month = jun,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rosin:1977:SM,
author = "Robert F. Rosin",
title = "The significance of microprogramming",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "1",
pages = "14--19",
month = jun,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gonzalez:1977:BRR,
author = "Mario J. Gonzalez",
title = "Book review: Review of {{\em Microprogramming
Primer\/}} by {Harry Katzan, Jr., McGraw-Hill 1977}",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "1",
pages = "29--30",
month = jun,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vineberg:1977:ICS,
author = "Maniel Vineberg",
title = "Implementation of character string pattern matching on
a multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "1--7",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bird:1977:APP,
author = "R. M. Bird and J. C. Tu and R. M. Worthy",
title = "Associative\slash parallel processors for searching
very large textual data bases",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "8--9",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1977:IFT,
author = "G. J. Lipovski",
title = "On imaginary fields, token transfers and floating
codes in intelligent secondary memories",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "17--22",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zaky:1977:MNN,
author = "S. G. Zaky",
title = "Microprocessors for non-numeric processing",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "23--30",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsiao:1977:ADC,
author = "David K. Hsiao and Krishnamurthi Kannan",
title = "The architecture of a database computer --- a
summary",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "31--33",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rosenthal:1977:DMM,
author = "Robert S. Rosenthal",
title = "The data management machine, a classification",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "35--39",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McDonell:1977:TNS,
author = "Ken J. McDonell",
title = "Trends in non-software support for input-output
functions",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "40--47",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cerretti:1977:UIP,
author = "R. Cerretti and D. Jasilli and D. R. Matteucci",
title = "{Ulisse}: {An Italian} project for a multifunctional
terminal system",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "48--50",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bray:1977:DMR,
author = "Olin H. Bray",
title = "Data management requirements: {The} similarity of
memory management, database systems, and message
processing",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "68--76",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Landson:1977:CSA,
author = "Barry M. Landson and Robert G. Sargent",
title = "A comparison of sequential and associate computing of
priority queues",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "2",
pages = "77--78",
month = may,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Myers:1977:CAS,
author = "Glenford J. Myers",
title = "The case against stack-oriented instruction sets",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "3",
pages = "7--10",
month = aug,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tanenbaum:1977:AMA,
author = "Andrew S. Tanenbaum",
title = "Ambiguous machine architecture and program
efficiency",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "3",
pages = "11--13",
month = aug,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hicks:1977:MCA,
author = "D. R. Hicks",
title = "Microprogramming with a content-addressable
read-only-memory",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "3",
pages = "14--15",
month = aug,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hicks:1977:MPS,
author = "D. R. Hicks",
title = "Multitasking as a program structuring primitive",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "3",
pages = "16--18",
month = aug,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chroust:1977:BRR,
author = "G. Chroust",
title = "Book reviews: Review of {{\em Digital System
Implementation\/}} by {Gerrit A. Blaauw, Prentice Hall,
Series in Automatic Computation 1976}",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "4",
pages = "27--28",
month = oct,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:09 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hagan:1977:VMS,
author = "R. A. Hagan and C. S. Wallace",
title = "A virtual memory system for the {Hewlett Packard
2100A}",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "5",
pages = "5--13",
month = dec,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baskett:1977:MMF,
author = "Forest Baskett",
title = "More on microprocessors of the future",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "5",
pages = "14--17",
month = dec,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chu:1977:DEC,
author = "Yaohan Chu",
title = "Direct-execution computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "5",
pages = "18--23",
month = dec,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schulthess:1977:RCA,
author = "Peter U. Schulthess and Eduard P. Mumprecht",
title = "Reply to the case against stack-oriented instruction
sets",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "5",
pages = "24--27",
month = dec,
year = "1977",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mountain:1978:AMC,
author = "John B. Mountain and Philip H. Enslow",
title = "Application of the military computer family
architecture selection criteria to the {PR1ME P400}",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "6",
pages = "3--17",
month = feb,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1978:JFM,
author = "G. Jack Lipovski",
title = "Just a few more words on microprocessors of the
future",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "6",
pages = "18--21",
month = feb,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keedy:1978:USE,
author = "J. L. Keedy",
title = "On the use of stacks in the evaluation of
expressions",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "6",
pages = "22--28",
month = feb,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tanenbaum:1978:RPA,
author = "Andrew S. Tanenbaum",
title = "Review of {{\em Processor Architecture\/}} by {S. H.
Lavington, NCC Publications, Manchester 1976}",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "6",
pages = "31--31",
month = feb,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Whiteside:1978:BRR,
author = "A. E. Whiteside",
title = "Book reviews: Review of {{\em The Architecture of
Concurrent Programs\/}} by {Per Brinch Hansen,
Prentice-Hall 1977}",
journal = j-COMP-ARCH-NEWS,
volume = "6",
number = "6",
pages = "32--32",
month = feb,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhandarkar:1978:STT,
author = "Dileep P. Bhandarkar and J. Egil Juliussen",
title = "Semiconductor technology: trends and implications",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "1",
pages = "4--14",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Payne:1978:CCD,
author = "A. J. Payne",
title = "A computer console design to help the operator",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "1",
pages = "15--22",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McGlynn:1978:RCA,
author = "Daniel R. McGlynn",
title = "Review of {{\em Content Addressable Parallel
Processors\/}} by {Caxton C. Foster. Van Nostrand
Reinhold Co. 1976}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "1",
pages = "23--23",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramamoorthy:1978:RSC,
author = "C. V. Ramamoorthy",
title = "Review of {{\em Structured Computer Organization\/}}
by {Andrew S. Tanenbaum, Prentice-Hall 1976}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "1",
pages = "23--23",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Buchholz:1978:RCS,
author = "W. Buchholz",
title = "Review of {{\em Computer System Architecture\/}} by
{M. Morris Mano, Prentice-Hall 1976}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "1",
pages = "24--24",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vranesic:1978:BRR,
author = "Z. G. Vranesic",
title = "Book reviews: Review of {{\em Content Addressable
Parallel Processors\/}} by {Caxton C. Foster, Van
Nostrand Reinhold Co. 1976}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "1",
pages = "24--24",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Korfhage:1978:DPU,
author = "R. R. Korfhage and W. H. E. Day and L. L. Beck and W.
F. Appelbe",
title = "Data physics: an unorthodox view of data and its
implications in data processors",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "1--7",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Copeland:1978:SSS,
author = "George P. Copeland",
title = "String storage and searching for data base
applications: implementation on the {INDY} backend
kernel",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "8--17",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Otis:1978:ERD,
author = "Allen J. Otis and George P. Copeland",
title = "Editing requirements for data base applications and
their implementation on the {INDY} backend kernel",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "18--29",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1978:SPI,
author = "G. Jack Lipovski",
title = "Semantic paging on intelligent discs",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "30--34",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Williams:1978:MSD,
author = "Rhon Williams",
title = "A multiprocessing system for the direct execution of
{LISP}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "35--41",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bird:1978:TFI,
author = "R. M. Bird and J. B. Newsbaum and J. L. Trefftzs",
title = "Text file inversion: an evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "42--50",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Roberts:1978:SCA,
author = "David C. Roberts",
title = "A specialized computer architecture for text
retrieval",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "51--59",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stucki:1978:CCA,
author = "M. J. Stucki and J. R. Cox and G. C. Roman and P. N.
Turcu",
title = "Coordinating concurrent access in a distributed
database architecture",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "60--64",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gouda:1978:HCC,
author = "Mohamed G. Gouda",
title = "A hierarchical controller for concurrent accessing of
distributed databases",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "65--70",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gavish:1978:EAD,
author = "Bezalel Gavish and Harvey Koch",
title = "An extensible architecture for data flow processing",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "71--76",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harvill:1978:FPO,
author = "J. B. Harvill",
title = "Functional parallelism in an operand state saving
computer",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "77--84",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hutchison:1978:MM,
author = "J. S. Hutchison and W. G. Roman",
title = "Madman machine",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "85--90",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Banerjee:1978:UDM,
author = "Jayanta Banerjee and David K. Hsiao",
title = "The use of a database machine for supporting
relational databases",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "91--98",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sadowski:1978:EPR,
author = "Paul J. Sadowski and S. A. Schuster",
title = "Exploiting parallelism in a {Relational Associative
Processor}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "99--109",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:1978:BRD,
author = "Hsu Chang",
title = "Bubbles for relational database",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "110--116",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{ElMasri:1978:MIR,
author = "A. {El Masri} and J. Rohmer and D. Tusera",
title = "A machine for information retrieval",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "117--120",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matteucci:1978:DSA,
author = "Dante R. Matteucci",
title = "A distributed structure for the automization of the
{Catalog of the National Cultural Heritage}:
experiences and proposals",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "2",
pages = "121--133",
month = aug,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:41 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thurber:1978:CCT,
author = "Kenneth J. Thurber",
title = "Computer communication techniques",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "3",
pages = "7--16",
month = oct,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jennings:1978:VP,
author = "Hal W. Jennings",
title = "A variation on the {PDP 11}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "3",
pages = "17--26",
month = oct,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:02 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hansen:1978:MAC,
author = "Per Brinch Hansen",
title = "Multiprocessor architectures for concurrent programs",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "4",
pages = "4--23",
month = dec,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keedy:1978:EEU,
author = "J. L. Keedy",
title = "On the evaluation of expressions using accumulators,
stacks and store-to-store instructions",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "4",
pages = "24--27",
month = dec,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chattergy:1978:CL,
author = "Rahul Chattergy",
title = "In the current literature",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "4",
pages = "30--30",
month = dec,
year = "1978",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cragon:1979:ECS,
author = "Harvey G. Cragon",
title = "An evaluation of code space requirements and
performance of various architectures",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "5",
pages = "5--21",
month = feb,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thurber:1979:BLC,
author = "Kenneth J. Thurber and Harvey A. Freeman",
title = "A bibliography of local computer network
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "5",
pages = "22--27",
month = feb,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cox:1979:NCA,
author = "Lyle A. {Cox, Jr.}",
title = "The nature of ``computer architecture''",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "7",
pages = "8--12",
month = apr,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{vandeSnepscheut:1979:INP,
author = "Jan L. A. van de Snepscheut and Gert A. Slavenburg",
title = "Introducing the notion of processes to hardware",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "7",
pages = "13--23",
month = apr,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Atkins:1979:RAC,
author = "D. E. Atkins",
title = "Review of {{\em Advances in Computer Architecture\/}}
by {Glenford J. Myers. Wiley-Interscience Division of
John Wiley and Sons 1978}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "7",
pages = "25--26",
month = apr,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bowyer:1979:BRS,
author = "Kevin W. Bowyer",
title = "Book review of {{\em The Structure of Computers and
Computations: Volume One\/}} by {David J. Kuck. John
Wiley \& Sons 1978}",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "7",
pages = "27--30",
month = apr,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gibson:1979:TOR,
author = "Randall Gibson and Paul Anderson",
title = "Technical overview of the {Renaissance Octobus}
system",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "8",
pages = "2--9",
month = jun,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stevenson:1979:EEM,
author = "Johan W. Stevenson and Andrew S. Tanenbaum",
title = "Efficient encoding of machine instructions",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "8",
pages = "10--17",
month = jun,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keedy:1979:MUS,
author = "J. L. Keedy",
title = "More on the use of stacks in the evaluation of
expressions",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "8",
pages = "18--22",
month = jun,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Quick:1979:IMP,
author = "G. E. Quick",
title = "Intelligent memory: ``a parallel processing
concept''",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "8",
pages = "23--28",
month = jun,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rivest:1979:BCA,
author = "Ronald L. Rivest",
title = "The {BLIZZARD} computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "9",
pages = "2--10",
month = aug,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keedy:1979:TPR,
author = "J. L. Keedy",
title = "A technique for passing reference parameters in an
information-hiding architecture",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "9",
pages = "11--15",
month = aug,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kavipurapu:1979:QAU,
author = "Krishna M. Kavipurapu and Dennis J. Frailey",
title = "Quantification of architectures using software
science",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "10",
pages = "2--6",
month = oct,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Turton:1979:PHS,
author = "Trevor Turton",
title = "A proposed high-speed computer design",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "10",
pages = "7--21",
month = oct,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Staff:1979:CL,
author = "{Computer Architecture News} staff",
title = "In the current literature",
journal = j-COMP-ARCH-NEWS,
volume = "7",
number = "10",
pages = "22--22",
month = oct,
year = "1979",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Richards:1980:CE,
author = "Dana Richards",
title = "On a {``Counter--Example''}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "2",
pages = "2--3",
month = apr,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Denning:1980:WIC,
author = "Peter J. Denning",
title = "Why not innovations in computer architecture?",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "2",
pages = "4--7",
month = apr,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gerrity:1980:HDU,
author = "G. W. Gerrity",
title = "Hardware detection of undefined references",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "2",
pages = "8--11",
month = apr,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Denning:1980:MCS,
author = "Peter J. Denning and T. Don Dennis",
title = "On minimizing contention at semaphores",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "2",
pages = "12--19",
month = apr,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dennis:1980:BBD,
author = "Jack B. Dennis and G. Andrew Boughton and Clement K.
C. Leung",
title = "Building blocks for data flow prototypes",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "1--8",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Davidson:1980:MSM,
author = "Edward S. Davidson",
title = "A multiple stream microprocessor prototype system:
{AMP-1}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "9--16",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Andre:1980:KAO,
author = "F. Andre and J. P. Ban{\^a}tre and H. Leroy and G.
Paget and F. Ployette and J. P. Routeau",
title = "{KENSUR}: An architecture oriented towards programming
languages translation",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "17--22",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kuhl:1980:DFT,
author = "J. G. Kuhl and S. M. Reddy",
title = "Distributed fault-tolerance for large multiprocessor
systems",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "23--30",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Malek:1980:CCA,
author = "Miroslaw Malek",
title = "A comparison connection assignment for diagnosis of
multiprocessor systems",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "31--36",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grosspietsch:1980:CTR,
author = "K. E. Grosspietsch and J. Kaiser and E. Nett",
title = "A concept for test and reconfiguration of a
fault-tolerant {VLSI} processor system",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "37--43",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brassard:1980:PBC,
author = "Jean-Paul Brassard and Jan Gecsei",
title = "Path building in cellular partitioning networks",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "44--50",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McMillen:1980:MMC,
author = "Robert J. McMillen and Howard Jay Siegel",
title = "{MIMD} machine communication using the augmented data
manipulator network",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "51--60",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shen:1980:FTC,
author = "John P. Shen and John P. Hayes",
title = "Fault tolerance of a class of connecting networks",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "61--71",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Coffman:1980:CBS,
author = "E. G. {Coffman, Jr.} and Kimming So",
title = "On the comparison between single and multiple
processor systems",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "72--79",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hamacher:1980:PCF,
author = "V. Carl Hamacher and Gerald S. Shedler",
title = "Performance of a collision-free local bus network
having asynchronous distributed control",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "80--87",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zuberek:1980:TPN,
author = "W. M. Zuberek",
title = "Timed {Petri} nets and preliminary performance
evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "88--96",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ditzel:1980:RHL,
author = "David R. Ditzel and David A. Patterson",
title = "Retrospective on high-level language computer
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "97--104",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sansonnet:1980:MLD,
author = "J. P. Sansonnet and M. Castan and C. Percebois",
title = "{M3L}: a list-directed architecture",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "105--112",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hibino:1980:PPG,
author = "Yasushi Hibino",
title = "A Practical Parallel Garbage Collection Algorithm and
Its Implementation",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "113--120",
month = may,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
URL = "ftp://ftp.math.utah.edu/pub/mirrors/ftp.ira.uka.de/bibliography/Compiler/garbage.collection.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "Hardware assisted GC",
}
@Article{Treleaven:1980:MPR,
author = "Philip C. Treleaven and Geoffrey F. Mole",
title = "A multi-processor reduction machine for user-defined
reduction languages",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "121--130",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tobias:1980:SUM,
author = "Jeffrey M. Tobias",
title = "A single user multiprocessor incorporating processor
manipulation facilities",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "131--138",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Halstead:1980:MSD,
author = "Robert H. {Halstead, Jr.} and Stephen A. Ward",
title = "The {MuNet}: a scalable decentralized architecture for
parallel computation",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "139--145",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lampson:1980:PHP,
author = "Butler W. Lampson and Kenneth A. Pier",
title = "A processor for a high-performance personal computer",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "146--160",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Edwards:1980:MGN,
author = "D. B. G. Edwards and A. E. Knowles and J. V. Woods",
title = "{MU6-G}: a new design to achieve mainframe performance
from a mini-sized computer",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "161--167",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Batcher:1980:AMP,
author = "Kenneth E. Batcher",
title = "Architecture of a massively parallel processor",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "168--173",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Palmer:1980:IND,
author = "John Palmer",
title = "The {Intel 8087} numeric data processor",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "174--181",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kuhn:1980:EMA,
author = "Robert H. Kuhn",
title = "Efficient mapping of algorithms to single-stage
interconnections",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "182--189",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nassimi:1980:SRB,
author = "David Nassimi and Sartaj Sahni",
title = "A self routing {Benes} network",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "190--195",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{vonIssendorff:1980:ANF,
author = "H. von Issendorff and W. Gr{\"u}newald",
title = "An adaptable network for functional distributed
systems",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "196--201",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Riad:1980:CFC,
author = "Mokhtar Boshra Riad",
title = "A combination of field and current access techniques
for efficient and cost-effective bubble memories",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "202--210",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Trivedi:1980:DLS,
author = "K. S. Trivedi",
title = "Designing linear storage hierarchies so as to maximize
reliability subject to cost and performance
constraints",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "211--217",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ahuja:1980:APP,
author = "Sudhir R. Ahuja and Charles S. Roberts",
title = "An associative\slash parallel processor for partial
match retrieval using superimposed codes",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "218--227",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ruggiero:1980:MBV,
author = "M. D. Ruggiero and S. G. Zaky",
title = "A microprocessor-based virtual memory system",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "228--235",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jagannathan:1980:TAI,
author = "Anand Jagannathan",
title = "A technique for the architectural implementation of
software subsystems",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "236--244",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Berstis:1980:SPD,
author = "Viktors Berstis",
title = "Security and protection of data in the {IBM
System\slash 38}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "245--252",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hoffmann:1980:HIC,
author = "Miguel Garc{\'\i}a Hoffmann",
title = "Hardware implementation of communication protocols:
a formal approach",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "253--263",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Guillier:1980:ACF,
author = "P. Guillier and D. Slosberg",
title = "An architecture with comprehensive facilities of
inter-process synchronization and communication",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "264--270",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lougheed:1980:CPP,
author = "Robert M. Lougheed and David L. McCubbrey",
title = "The cytocomputer: a practical pipelined image
processor",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "271--277",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Halatsis:1980:ACM,
author = "C. Halatsis and A. van Dam and J. Joosten and M.
Letheren",
title = "Architectural considerations for a microprogrammable
emulating engine using bit-slices",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "278--291",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Irwin:1980:OPS,
author = "Mary Jane Irwin and Don Heller",
title = "Online pipeline systems for recursive numeric
computations",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "292--299",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Foster:1980:DSP,
author = "M. J. Foster and H. T. Kung",
title = "Design of special-purpose {VLSI} chips: Example and
opinions",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "300--307",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:1980:SLC,
author = "Anshul Kumar and P. C. P. Bhatt",
title = "A structured language for {CAD} of digital systems",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "308--316",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hercksen:1980:HMS,
author = "Uwe Hercksen and Rainer Klar and Wolfgang
Klein{\"o}der",
title = "Hardware-measurements of storage access conflicts in
the processor array {EGPA(1)}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "317--324",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tokoro:1980:HLM,
author = "Mario Tokoro and Kiichiro Tamaru and Masaaki Mizuno
and Masao Hori",
title = "A high level multi-lingual multiprocessor {KMP\slash
II}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "3",
pages = "325--333",
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:54:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aupperle:1980:RIC,
author = "Ken Aupperle",
title = "A real innovation in computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "4",
pages = "6--7",
month = jun,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Galloway:1980:AIR,
author = "John R. {Galloway, Jr.}",
title = "Architectural innovation round: round \#3",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "4",
pages = "8--10",
month = jun,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sharp:1980:STD,
author = "John A. Sharp",
title = "Some thoughts on data flow architectures",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "4",
pages = "11--21",
month = jun,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Payne:1980:VFP,
author = "Mary Payne and Dileep Bhandarkar",
title = "{VAX} floating point: a solid foundation for numerical
computation",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "4",
pages = "22--33",
month = jun,
year = "1980",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/641845.641849",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 24 12:02:21 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dickman:1980:TR,
author = "Lloyd Dickman",
title = "Treasurer's report",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "4",
pages = "37--38",
month = jun,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Staff:1980:CLAa,
author = "{Computer Architecture News} staff",
title = "Current literature: abstracts of articles of
interest\ldots{}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "4",
pages = "48--48",
month = jun,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Davies:1980:CAM,
author = "Julian Davies",
title = "Clock architecture and management",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "5",
pages = "3--6",
month = aug,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:16 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chroust:1980:RMO,
author = "G. Chroust and J. R. M{\"u}hlbacher",
title = "Rivalling multiprocessor organization: a
hardware\slash speed trade-off",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "5",
pages = "7--10",
month = aug,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:16 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stevenson:1980:RPI,
author = "David Stevenson",
title = "A report on the proposed {IEEE Floating Point Standard
(IEEE Task p754)}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "5",
pages = "11--12",
month = aug,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:16 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rattner:1980:OBC,
author = "Justin Rattner and George Cox",
title = "Object-based computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "4--11",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Myers:1980:HIC,
author = "G. J. Myers and B. R. S. Buckingham",
title = "A hardware implementation of capability-based
addressing",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "12--24",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1980:CRI,
author = "David A. Patterson and David R. Ditzel",
title = "The case for the reduced instruction set computer",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "25--33",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Clark:1980:CCR,
author = "Douglas W. Clark and William D. Strecker",
title = "Comments on {``The Case for the Reduced Instruction
Set Computer,''} by {Patterson} and {Ditzel}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "34--38",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brakefield:1980:BAT,
author = "James C. Brakefield",
title = "Is 32 bits of address too much?",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "39--40",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brakefield:1980:PB,
author = "James C. Brakefield",
title = "The peripheral bus",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "41--43",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mudge:1980:BRR,
author = "Trevor Mudge",
title = "Book reviews: Review of {{\em The Structure of
Computers and Computation, Vol. I\/}} by {David J.
Kuck, John Wiley \& and Sons 1978}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "44--45",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Staff:1980:CLAb,
author = "Computer Architecture News Staff",
title = "Current literature: abstracts of articles of
interest\ldots{}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "6",
pages = "46--46",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reed:1980:WFC,
author = "Karl Reed",
title = "The way forward in computer architecture research",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "7",
pages = "3--7",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gilmore:1980:SEM,
author = "John Gilmore",
title = "Suggested enhancements to the {Motorola MC68000}",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "7",
pages = "8--14",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wakerly:1980:PED,
author = "John F. Wakerly",
title = "{Pascal} extensions for describing computer
instruction sets",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "7",
pages = "15--23",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kavi:1980:SA,
author = "Krishna M. Kavi",
title = "Semantics of an algorithm",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "7",
pages = "24--26",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Treleaven:1980:VMA,
author = "Philip C. Treleaven",
title = "{VLSI}: machine architecture and very high level
languages",
journal = j-COMP-ARCH-NEWS,
volume = "8",
number = "7",
pages = "27--38",
month = oct,
year = "1980",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dickman:1981:SB,
author = "Lloyd Dickman",
title = "{SIGARCH} business",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "1",
pages = "7--8",
month = feb,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DePrycker:1981:NIM,
author = "Martin L. {De Prycker}",
title = "A new index mode for the {VAX-11}",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "2",
pages = "10--11",
month = apr,
year = "1981",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296940.1296941",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:58:05 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "One advantage of most high level languages over
machine languages consists of the availability of
concepts which are frequently used by most programmers.
One of these concepts is the array mechanism, where the
high level language generally provides three operations
associated with array manipulations: type-checking,
bounds-checking and address calculation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stevenson:1981:PP,
author = "David Stevenson",
title = "The {Phoenix Project}",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "2",
pages = "12--15",
month = apr,
year = "1981",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296940.1296942",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:58:05 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The Phoenix Project was an exploration of the issues
surrounding large scale scientific computing. It was
conducted at the Institute for Advanced Computation,
NASA-Ames Research Center at Moffett Field, California
from 1975 to 1979. The primary results of the project
were a sizing of the likely needs of large scale
scientific computing during the 1980s, what computing
technology could be available to meet those needs, a
conceptual design of a processor that could meet those
needs, and a programming, language suitable for use by
this community on a parallel processor such as the one
proposed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{VanOost:1981:MPS,
author = "E. M. J. C. {Van Oost}",
title = "Multi-processor system description and simulation
using structured multi-programming languages",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "2",
pages = "16--32",
month = apr,
year = "1981",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296940.1296943",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:58:05 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Most of the multi-processor systems designed for real
time control demand a high efficiency, compromising the
simplicity of the system. If this requirement imposes a
hardware implementation of most of the primitives of
the system, a complicated hardware will result. In
order to retain to some extent the ease of using
structured multi-programming languages, e.g. Concurrent
Pascal [1], we have used these languages for the
description and simulation of the complex hardware,
instead of using them for software implementation of
parallelism.\par
This approach is explained with examples taken from an
existing multi-processor system [2] developed at the
Brussels Free University (V.U.B.).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wakerly:1981:BRR,
author = "John Wakerly",
title = "Book review: Review of {'The Computers that Saved
Metropolis, by DC Comics and Radio Shack', July 1980}",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "2",
pages = "33--34",
month = apr,
year = "1981",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296940.1296945",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:58:05 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arvind:1981:MPD,
author = "Arvind and V. Kathail",
title = "A Multiple Processor Data Flow Machine that Supports
Generalized Procedures",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "3",
pages = "??--??",
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibsource = "ftp://ftp.math.utah.edu/pub/mirrors/ftp.ira.uka.de/bibliography/Compiler/Functional.bib;
http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "Proceedings of the 8th Annual Symposium on Computer
Architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "functional dataflow",
}
@Article{Gerrity:1981:PI,
author = "G. W. Gerrity",
title = "On processes and interrupts",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "4",
pages = "4--14",
month = jun,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hill:1981:HMS,
author = "Dwight D. Hill",
title = "A hardware mechanism for supporting range checks",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "4",
pages = "15--21",
month = jun,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cherniavsky:1981:CMA,
author = "Vladimir S. Cherniavsky",
title = "The computing memory another distributed computer
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "4",
pages = "22--24",
month = jun,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thornton:1981:ASC,
author = "James E. Thornton",
title = "{8th Annual Symposium on Computer Architecture:
Heterogeneous Computer Architecture}",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "4",
pages = "25--33",
month = jun,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Staff:1981:ETP,
author = "Computer Architecture News Staff",
title = "Errata for two publications",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "4",
pages = "34--34",
month = jun,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lindsay:1981:CMM,
author = "Donald C. Lindsay",
title = "Cache memory for microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "5",
pages = "6--13",
month = aug,
year = "1981",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296947.1296948",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:16 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A growth path for current microprocessors is suggested
which includes bus enhancements and cache memories. The
implications are examined, and several differences from
the mainframe world are pointed out.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kavi:1981:IAC,
author = "Krishna M. Kavi",
title = "Innovative architectures and commercial computers: a
summary of the panel discussion at {NCC 1981}",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "5",
pages = "14--16",
month = aug,
year = "1981",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296947.1296949",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:16 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The session was held on May 4, 1981 in Chicago at NCC
1981. The panelists were Harvey Cragon, Pat Goldberg,
Dave Patterson, Justin Rattner, Dean Earnest and Peter
Denning. Krishna Kavi was the moderator. A complete
report of the session is available and can be obtained
by writing to the Computer Science Department, P. O.
Box 44330, U.S.L., Lafayette, LA 70504.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jenevein:1981:EHS,
author = "R. M. Jenevein and ?. DeGroot and G. Jack Lipovski",
title = "Errata: ``{A} hardware support mechanism for
scheduling resources in parallel machine environment'':
(from {Proceedings of the 8th Annual Symposium on
Computer Architecture}, p. 57)",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "5",
pages = "17--17",
month = aug,
year = "1981",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296947.1296950",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:16 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuen:1981:EPS,
author = "C. K. Yuen",
title = "Extending the power of short-wordlength processors by
means of context-dependent machine instructions",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "6",
pages = "9--15",
month = oct,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gottlieb:1981:CPP,
author = "Allan Gottlieb and Clyde P. Kruskal",
title = "Coordinating parallel processors: a partial
unification",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "6",
pages = "16--24",
month = oct,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:1981:ESM,
author = "Anonymous",
title = "Errata: Structured machine design: an ongoing
experiment",
journal = j-COMP-ARCH-NEWS,
volume = "9",
number = "6",
pages = "25--25",
month = oct,
year = "1981",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McDowell:1982:PML,
author = "Charlie McDowell",
title = "Protection at the micromachine level",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "1",
pages = "4--8",
month = jan,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Feustel:1982:PPC,
author = "Edward A. Feustel",
title = "Protected procedure call on the {PRIME(TM)} machines",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "1",
pages = "9--22",
month = jan,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{El-Halabi:1982:SRD,
author = "Hossam El-Halabi and Dharma P. Agrawal",
title = "Some remarks on direct execution computers",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "1",
pages = "23--27",
month = jan,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fitzpatrick:1982:RAV,
author = "Daniel T. Fitzpatrick and John K. Foderaro and Manolis
G. H. Katevenis and Howard A. Landman and David
A. Patterson and James B. Peek and Zvi Peshkess and
Carlo H. S{\'e}quin and Robert W. Sherburne and Korbin
S. {Van Dyke}",
title = "A {RISCy} approach to {VLSI}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "1",
pages = "28--32",
month = jan,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rattner:1982:HSC,
author = "Justin Rattner",
title = "Hardware\slash software cooperation in the
{iAPX-432}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "1--1",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hennessy:1982:HST,
author = "John Hennessy and Norman Jouppi and Forest Baskett and
Thomas Gross and John Gill",
title = "Hardware\slash software tradeoffs for increased
performance",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "2--11",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rymarczyk:1982:CGP,
author = "James W. Rymarczyk",
title = "Coding guidelines for pipelined processors",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "12--19",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnsson:1982:OMP,
author = "Richard K. Johnsson and John D. Wick",
title = "An overview of the mesa processor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "20--29",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Berenbaum:1982:OSL,
author = "Alan D. Berenbaum and Michael W. Condry and Priscilla
M. Lu",
title = "The operating system and language support features of
the {BELLMACTM-32} microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "30--38",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Radin:1982:M,
author = "George Radin",
title = "The 801 minicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "39--47",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ditzel:1982:RAF,
author = "David R. Ditzel and H. R. McLellan",
title = "Register allocation for free: {The C} machine stack
cache",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "48--56",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harbison:1982:AAO,
author = "Samuel P. Harbison",
title = "An architectural alternative to optimizing compilers",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "57--65",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lampson:1982:FPC,
author = "Butler W. Lampson",
title = "Fast procedure calls",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "66--76",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jones:1982:SPM,
author = "Douglas W. Jones",
title = "Systematic protection mechanism design",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "77--80",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reed:1982:GPM,
author = "Karl Reed",
title = "On a general property of memory mapping tables",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "81--86",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cook:1982:EIO,
author = "Robert P. Cook and Nitin Donde",
title = "An experiment to improve operand addressing",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "87--91",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fusaoka:1982:CCH,
author = "Akira Fusaoka and Masaharu Hirayama",
title = "Compiler chip: a hardware implementation of compiler",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "92--95",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rau:1982:ASE,
author = "B. R. Rau and C. D. Glaeser and E. M. Greenawalt",
title = "Architectural support for the efficient generation of
code for horizontal architectures",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "96--99",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McLear:1982:GCD,
author = "R. E. McLear and D. M. Scheibelhut and E. Tammaru",
title = "Guidelines for creating a debuggable processor",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "100--106",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilkes:1982:HSM,
author = "M. V. Wilkes",
title = "Hardware support for memory protection: {Capability}
implementations",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "107--116",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pollack:1982:SAM,
author = "Fred J. Pollack and George W. Cox and Dan W.
Hammerstrom and Kevin C. Kahn and Konrad K. Lai and
Justin R. Rattner",
title = "Supporting {Ada} memory management in the {iAPX-432}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "117--131",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sansonnet:1982:DEL,
author = "J. P. Sansonnet and M. Castan and C. Percebois and D.
Botella and J. Perez",
title = "Direct execution of {Lisp} on a list-directed
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "132--139",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:1982:SRA,
author = "Mark Scott Johnson",
title = "Some requirements for architectural support of
software debugging",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "140--148",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Middelburg:1982:EPA,
author = "C. A. Middelburg",
title = "The effect of the {PDP-11} architecture on code
generation for chill",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "149--157",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sweet:1982:EAM,
author = "Richard E. Sweet and James G. {Sandman, Jr.}",
title = "Empirical analysis of the mesa instruction set",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "158--166",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McDaniel:1982:AMI,
author = "Gene McDaniel",
title = "An analysis of a mesa instruction set using dynamic
instruction frequencies",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "167--176",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wiecek:1982:CSV,
author = "Cheryl A. Wiecek",
title = "A case study of {VAX-11} instruction set usage for
compiler execution",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "177--184",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maekawa:1982:FSA,
author = "Mamoru Maekawa and Ken Sakamura and Chiaki Ishikawa",
title = "Firmware structure and architectural support for
monitors, vertical migration and user
microprogramming",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "185--194",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kamibayashi:1982:HOS,
author = "N. Kamibayashi and H. Ogawana and K. Nagayama and H.
Aiso",
title = "{Heart}: an operating system nucleus machine
implemented by firmware",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "195--204",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ahuja:1982:MMA,
author = "Sudhir R. Ahuja and Abhaya Asthana",
title = "A multi-microprocessor architecture with hardware
support for communication and scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "2",
pages = "205--209",
month = mar,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:44 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1982:RAH,
author = "David A. Patterson and Richard S. Piepho",
title = "{RISC} assessment: a high-level language experiment",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "3--8",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Clark:1982:MAI,
author = "Douglas W. Clark and Henry M. Levy",
title = "Measurement and analysis of instruction use in the
{VAX-11\slash 780}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "9--17",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kavi:1982:HAP,
author = "Krishna Kavi and Boumediene Belkhouche and Evelyn
Bullard and Lois Delcambre and Stephen Nemecek",
title = "{HLL} architectures: {Pitfalls} and predilections",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "18--23",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gottlieb:1982:NUD,
author = "Allan Gottlieb and Ralph Grishman and Clyde P. Kruskal
and Kevin P. McAuliffe and Larry Rudolph and Marc
Snir",
title = "The {NYU Ultracomputer}---designing a {MIMD},
shared-memory parallel machine (extended abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "27--42",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chu:1982:VAH,
author = "King-Hang Chu and King-Sun Fu",
title = "{VLSI} architectures for high speed recognition of
context-free languages and finite-state languages",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "43--49",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Franklin:1982:ACC,
author = "Mark A. Franklin and Donald F. Wann",
title = "Asynchronous and clocked control structures for {VLSI}
based interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "50--59",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McMillen:1982:PFT,
author = "Robert J. McMillen and Howard Jay Siegel",
title = "Performance and fault tolerance improvements in the
{Inverse Augmented Data Manipulator} network",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "63--72",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parker:1982:GNM,
author = "D. S. Parker and C. S. Raghavendra",
title = "The {Gamma} network: a multiprocessor interconnection
network with redundant paths",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "73--80",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jenevein:1982:CPR,
author = "R. M. Jenevein and J. C. Browne",
title = "A control processor for a reconfigurable array
computer",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "81--89",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhuyan:1982:GCP,
author = "Laxmi N. Bhuyan and Dharma P. Agrawal",
title = "A general class of processor interconnection
strategies",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "90--98",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burkowski:1982:ISD,
author = "F. J. Burkowski",
title = "Instruction set design issues relating to a static
dataflow computer",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "101--111",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1982:DAE,
author = "James E. Smith",
title = "Decoupled access\slash execute computer
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "112--119",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Caluwaerts:1982:DFA,
author = "L. J. Caluwaerts and J. Debacker and J. A.
Peperstraete",
title = "A data flow architecture with a paged memory system",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "120--127",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rau:1982:ECG,
author = "B. Ramakrishna Rau and Christopher D. Glaeser and
Raymond L. Picard",
title = "Efficient code generation for horizontal
architectures: {Compiler} techniques and architectural
support",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "131--139",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barton:1982:SNH,
author = "Gene C. Barton",
title = "{Sentry}: a novel hardware implementation of classic
operating system mechanisms",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "140--147",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abramovici:1982:LSM,
author = "M. Abramovici and Y. H. Levendel and P. R. Menon",
title = "A logic simulation machine",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "148--157",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dasgupta:1982:TFL,
author = "Subrata Dasgupta and Marius Olafsson",
title = "Towards a family of languages for the design and
implementation of machine architectures",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "158--167",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1982:RPD,
author = "Yann-Hang Lee and Kang G. Shin",
title = "Rollback propagation detection and performance
evaluation of {FTMR2M}---a fault-tolerant
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "171--180",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lin:1982:DFT,
author = "Woei Lin and Chuan-lin Wu",
title = "Design of a $ 2 \times 2 $ fault-tolerant switching
element",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "181--189",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fussell:1982:FTW,
author = "Donald Fussell and Peter Varman",
title = "Fault-tolerant wafer-scale architectures for {VLSI}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "190--198",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pramanik:1982:DF,
author = "Sakti Pramanik",
title = "Database filters",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "201--210",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tokoro:1982:SSI,
author = "Mario Tokoro and Takashi Takizuka",
title = "On the semantic structure of information --- a
proposal of the abstract storage architecture",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "211--217",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dohi:1982:HSA,
author = "Yasunori Dohi and Akira Suzuki and Noriyuki Matsui",
title = "Hardware sorter and its application to data base
machine",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "218--225",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Treleaven:1982:RCA,
author = "Philip C. Treleaven and Richard P. Hopkins",
title = "A recursive computer architecture for {VLSI}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "229--238",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Castan:1982:HRP,
author = "M. Castan and E. I. Organick",
title = "{$ \mu $3L}: an {HLL-RISC} processor for parallel
execution of {FP}-language programs",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "239--247",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hommes:1982:HSC,
author = "F. Hommes",
title = "The heap\slash substitution concept --- an
implementation of functional operations on data
structures for a reduction machine",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "248--256",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reynolds:1982:SRA,
author = "Paul F. {Reynolds, Jr.}",
title = "A shared resource algorithm for distributed
simulation",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "259--266",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jain:1982:DPT,
author = "Bijendra N. Jain",
title = "Duplication of packets and their detection in {X.25}
communication protocols",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "267--273",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Markenscoff:1982:MPS,
author = "Pauline Markenscoff",
title = "A multiple processor system for real time control
tasks",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "274--280",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Miller:1982:HMD,
author = "Leslie Jill Miller",
title = "A heterogeneous multiprocessor design and the
distributed scheduling of its task group workload",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "283--290",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goble:1982:DPV,
author = "George H. Goble and Michael H. Marsh",
title = "A dual processor {VAX 11\slash 780}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "291--298",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dubois:1982:ECC,
author = "Michel Dubois and Fay{\.e} A. Briggs",
title = "Effects of cache coherency in multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "299--308",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mudge:1982:PAC,
author = "T. N. Mudge and B. A. Makrucki",
title = "Probabilistic analysis of a crossbar switch",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "311--320",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Levitan:1982:FEN,
author = "Steven P. Levitan and Caxton C. Foster",
title = "Finding an extremum in a network",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "321--325",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Premkumar:1982:RAR,
author = "U. V. Premkumar and J. C. Browne",
title = "Resource allocation in rectangular {SW} banyans",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "326--333",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:1982:LA,
author = "Anonymous",
title = "List of authors",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "3",
pages = "335--335",
month = apr,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:52 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mayer:1982:ABB,
author = "Alastair J. W. Mayer",
title = "The architecture of the {Burroughs B5000}: 20 years
later and still ahead of the times?",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "4",
pages = "3--10",
month = jun,
year = "1982",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/641542.641543",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brakefield:1982:OSA,
author = "James C. Brakefield",
title = "From the other side of the {Atlantic}: how to improve
upon the {MU5} design",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "4",
pages = "11--16",
month = jun,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hansen:1982:PEI,
author = "Paul M. Hansen and Mark A. Linton and Robert N. Mayo
and Marguerite Murphy and David A. Patterson",
title = "A performance evaluation of the {Intel iAPX 432}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "4",
pages = "17--26",
month = jun,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huguet:1982:PPS,
author = "Miquel Huguet",
title = "The protection of the processor status word of the
{PDP-11\slash 60}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "4",
pages = "27--30",
month = jun,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brakefield:1982:JWO,
author = "James Brakefield",
title = "Just what is an op-code?: or a universal computer
design",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "4",
pages = "31--34",
month = jun,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:07 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Knott:1982:FDA,
author = "J. D. Knott and T. W. Crockett",
title = "Fair dynamic arbitration for a multiprocessor
communications bus",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "5",
pages = "4--9",
month = sep,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Larus:1982:CMA,
author = "James R. Larus",
title = "A comparison of microcode, assembly code, and
high-level languages on the {VAX-11} and {RISC I}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "5",
pages = "10--15",
month = sep,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1982:PEI,
author = "David A. Patterson",
title = "A performance evaluation of the {Intel 80286}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "5",
pages = "16--18",
month = sep,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Egan:1982:EVC,
author = "Rod Egan",
title = "The effect of {VLSI} on computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "5",
pages = "19--22",
month = sep,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Benzie:1982:BRR,
author = "Thomas Benzie",
title = "Book reviews: Review of {{\em Microcomputer
Architecture and Programming\/}} by {John F. Wakerly,
John Wiley \& Sons, Inc., 1981}",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "5",
pages = "23--23",
month = sep,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Levy:1982:UBM,
author = "Henry M. Levy and Douglas W. Clark",
title = "On the use of benchmarks for measuring system
performance",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "6",
pages = "5--8",
month = dec,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schulthess:1982:ONA,
author = "Peter Schulthess and Fritz Vonaesch",
title = "{OPA}: a new architecture for {Pascal-like}
languages",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "6",
pages = "9--20",
month = dec,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brakefield:1982:TI,
author = "James C. Brakefield",
title = "Talk on interpreters",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "6",
pages = "21--28",
month = dec,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Doran:1982:MFC,
author = "D. W. Doran",
title = "Main frame computer trends",
journal = j-COMP-ARCH-NEWS,
volume = "10",
number = "6",
pages = "29--44",
month = dec,
year = "1982",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gajski:1983:CLS,
author = "Daniel Gajski and David Kuck and Duncan Lawrie and
Ahmed Sameh",
title = "{CEDAR}: a large scale multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "1",
pages = "7--11",
month = mar,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{French:1983:TDF,
author = "Elaine French and Hugh Glaser",
title = "{TUKI}: a data flow processor",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "1",
pages = "12--18",
month = mar,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Marovac:1983:SAD,
author = "Nenad Marovac",
title = "A systematic approach to the design and implementation
of a computer instruction set",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "1",
pages = "19--24",
month = mar,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cragon:1983:EIS,
author = "Harvey Cragon",
title = "Executable instruction set specification",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "1",
pages = "25--43",
month = mar,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Colwell:1983:PTR,
author = "Robert P. Colwell and Charles Y. Hitchcock and E.
Douglas Jensen",
title = "Peering through the {RISC\slash CISC} fog: an outline
of research",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "1",
pages = "44--50",
month = mar,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gorsline:1983:RAC,
author = "G. W. Gorsline",
title = "Review of {{\em Advances in Computer Architecture\/}}
by {Glenford J. Myers, John Wiley \& Sons, Inc. 1982}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "1",
pages = "55--55",
month = mar,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sachs:1983:BRR,
author = "M. W. Sachs",
title = "Book reviews: Review of {{\em Microcomputer
Interfacing\/}} by {G. Jack Lipovski, Lexington Books
1980}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "1",
pages = "55--55",
month = mar,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abramson:1983:HSP,
author = "David Abramson and John Rosenberg",
title = "Hardware support for program debuggers in a paged
virtual memory",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "2",
pages = "8--19",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:42 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Frailey:1983:WLC,
author = "Dennis J. Frailey",
title = "Word length of a computer architecture definitions and
applications",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "2",
pages = "20--26",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:42 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hollaar:1983:BRR,
author = "Lee A. Hollaar",
title = "Book reviews: Review of {{\em Computer Design\/}} by
{Glen G. Langdon, Computeach Press}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "2",
pages = "27--28",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:42 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilkes:1983:SPS,
author = "Maurice V. Wilkes",
title = "Size, power, and speed (keynote address)",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "2--4",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Giloi:1983:TTC,
author = "W. K. Giloi",
title = "Towards a taxonomy of computer architecture based on
the machine data type view",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "6--15",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Avizienis:1983:FTF,
author = "Algirdas Avi{\v{z}}ienis",
title = "Framework for a taxonomy of fault-tolerance attributes
in computer systems",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "16--21",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pehrson:1983:CID,
author = "Bj{\"o}rn Pehrson and Joachim Parrow",
title = "Caddie an interactive design environment",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "24--31",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dasgupta:1983:VCA,
author = "Subrata Dasgupta",
title = "On the verification of computer architectures using an
architecture description language",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "32--38",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{King:1983:RSC,
author = "Richard M. King",
title = "Research on synthesis of concurrent computing systems
(extended abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "39--46",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fisher:1983:APP,
author = "Allan L. Fisher and H. T. Kung and Louis M. Monier and
Yasunori Dohi",
title = "Architecture of the {PSC}---a programmable systolic
chip",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "48--53",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fisher:1983:SLV,
author = "Allan L. Fisher and H. T. Kung",
title = "Synchronizing large {VLSI} processor arrays",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "54--58",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wagner:1983:BVM,
author = "Robert A. Wagner",
title = "The {Boolean Vector Machine [BVM]}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "59--66",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bonuccelli:1983:VTM,
author = "M. A. Bonuccelli and E. Lodi and F. Luccio and P.
Maestrini and L. Pagli",
title = "A {VLSI} tree machine for relational data bases",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "67--73",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Caluwaerts:1983:ISD,
author = "L. J. Caluwaerts and J. Debacker and J. A.
Peperstraete",
title = "Implementing streams on a data flow computer system
with paged memory",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "76--83",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Requa:1983:PDF,
author = "Joseph E. Requa",
title = "The {Piecewise Data Flow} architecture control flow
and register management",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "84--89",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tokoro:1983:WSC,
author = "Mario Tokoro and J. R. Jagannathan and Hideki
Sunahara",
title = "On the working set concept for data-flow machines",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "90--97",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Marczynski:1983:DDS,
author = "R. W. Marczy{\'n}ski and J. Milewski",
title = "A data driven system based on a microprogrammed
processor module",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "98--106",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1983:AVI,
author = "David A. Patterson and Phil Garrison and Mark Hill and
Dimitris Lioupis and Chris Nyberg and Tim Sippel and
Korbin {Van Dyke}",
title = "Architecture of a {VLSI} instruction cache for a
{RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "108--116",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yeh:1983:PSC,
author = "Phil C. C. Yeh and Janak H. Patel and Edward S.
Davidson",
title = "Performance of shared cache for parallel-pipelined
computer systems",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "117--123",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1983:UCM,
author = "James R. Goodman",
title = "Using cache memory to reduce processor-memory
traffic",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "124--131",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1983:SIC,
author = "James E. Smith and James R. Goodman",
title = "A study of instruction cache organizations and
replacement policies",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "132--137",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fisher:1983:VLI,
author = "Joseph A. Fisher",
title = "{Very Long Instruction Word} architectures and the
{ELI-512}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "140--150",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tomita:1983:UML,
author = "Shinji Tomita and Kiyoshi Shibayama and Toshiaki
Kitamura and Toshiyuki Nakata and Hiroshi Hagiwara",
title = "A user-microprogrammable, local host computer with
low-level parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "151--157",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gumpertz:1983:CTE,
author = "Richard H. Gumpertz",
title = "Combining tags with error codes",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "160--165",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Park:1983:FDB,
author = "Young Gil Park and Jung Wan Cho",
title = "Fault diagnosis of bit-slice processor",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "166--172",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fiol:1983:LDI,
author = "M. A. Fiol and I. Alegre and J. L. A. Yebra",
title = "Line digraph iterations and the (d,k) problem for
directed graphs",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "174--177",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Opper:1983:RAR,
author = "Eli Opper and Miroslaw Malek and G. Jack Lipovski",
title = "Resource allocation in rectangular {CC}-banyans",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "178--184",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sovis:1983:UTS,
author = "Franti{\v{s}}ek Sovi{\v{s}}",
title = "Uniform theory of the shuffle-exchange type
permutation networks",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "185--191",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Srini:1983:ACA,
author = "Vason P. Srini and Jorge F. Asenjo",
title = "Analysis of {Cray-1S} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "194--206",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jordan:1983:PMH,
author = "Harry F. Jordan",
title = "Performance measurements on {HEP} --- a pipelined
{MIMD} computer",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "207--212",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Amano:1983:SSM,
author = "Hideharu Amano and Takaichi Yoshida and Hideo Aiso",
title = "{(SM)2-Sparse Matrix Solving Machine}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "213--220",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Krishnan:1983:ESC,
author = "R. Kalyana Krishnan and A. K. Rajasekar and C. S.
Moghe",
title = "An experimental system for {Computer Science}
instruction",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "222--227",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kronlof:1983:ECM,
author = "Klaus Kronl{\"o}f",
title = "Execution control and memory management of a {Data
Flow Signal Processor}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "230--235",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kishi:1983:DDD,
author = "Masasuke Kishi and Hiroshi Yasuhara and Yasusuke
Kawamura",
title = "{DDDP}---a {Distributed Data Driven Processor}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "236--242",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Takahashi:1983:DFP,
author = "Naohisa Takahashi and Makoto Amamiya",
title = "A data flow processor array system: {Design} and
analysis",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "243--250",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pier:1983:RDH,
author = "Kenneth A. Pier",
title = "A retrospective on the {Dorado}, a high-performance
personal computer",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "252--269",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dugan:1983:SEA,
author = "Robert J. Dugan",
title = "{System\slash 370} extended architecture: a program
view of the channel subsystem",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "270--276",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Norton:1983:AIM,
author = "Richard L. Norton and Jacob A. Abraham",
title = "Adaptive interpretation as a means of exploiting
complex instruction sets",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "277--282",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:1983:SSC,
author = "Manoj Kumar and Daniel M. Dias and J. R. Jump",
title = "Switching strategies in a class of packet switching
networks",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "284--300",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wah:1983:CSD,
author = "Benjamin W. Wah",
title = "A comparative study of distributed resource sharing on
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "301--308",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fuchs:1983:CED,
author = "W. Kent Fuchs and Jacob A. Abraham and Kuang-Hua
Huang",
title = "Concurrent error detection in {VLSI} interconnection
networks",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "309--315",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Giloi:1983:HFD,
author = "W. K. Giloi and P. Behr",
title = "Hierarchical function distribution --- a design
principle for advanced multicomputer architectures",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "318--325",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stringa:1983:EIE,
author = "Luigi Stringa",
title = "{EMMA}-an industrial experience on large
multiprocessing architectures",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "326--333",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Philipson:1983:CSM,
author = "Lars Philipson and Bo Nilsson and Bjorn Breidegard",
title = "A communication structure for a multiprocessor
computer with distributed global memory",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "334--340",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hayashi:1983:AHP,
author = "Hiromu Hayashi and Akira Hattori and Haruo Akimoto",
title = "{ALPHA}---a high-performance {LISP} machine equipped
with a new stack structure and garbage collection
system",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "342--348",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Umeyama:1983:PEM,
author = "Shinji Umeyama and Koichiro Tamura",
title = "A parallel execution model of logic programs",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "349--355",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schmittgen:1983:SAC,
author = "Claudia Schmittgen and Werner Kluge",
title = "A system architecture for the concurrent evaluation of
applicative program expressions",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "356--362",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yamaguchi:1983:PEL,
author = "Yoshinori Yamaguchi and Kenji Toda and Toshitsugu
Yuba",
title = "A performance evaluation of a {Lisp}-based data-driven
machine {(EM-3)}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "363--369",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tanimoto:1983:PAP,
author = "Steven L. Tanimoto",
title = "A pyramidal approach to parallel processing",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "372--378",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gaillat:1983:DPP,
author = "G{\'e}rard Gaillat",
title = "The design of a parallel processor for image
processing on-board satellites: an application oriented
approach",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "379--386",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nishimura:1983:LPP,
author = "Hitoshi Nishimura and Hiroshi Ohno and Toru Kawata and
Isao Shirakawa and Koichi Omura",
title = "{Links-1} --- a parallel pipelined multimicrocomputer
system for image creation",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "387--394",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ericsson:1983:LSM,
author = "T. Ericsson and P. E. Danielsson",
title = "{LIPP} --- a {SIMD} multiprocessor architecture for
image processing",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "395--400",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Treleaven:1983:NGC,
author = "Philip C. Treleaven",
title = "The new generation of computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "402--409",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Uchida:1983:IMS,
author = "Shunichi Uchida",
title = "Inference machine: {From} sequential to parallel",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "410--416",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moto-oka:1983:OFG,
author = "Tohru Moto-oka",
title = "Overview to the {Fifth Generation Computer System}
project",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "417--422",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Murakami:1983:RDB,
author = "Kunio Murakami and Takeo Kakuta and Nobuyoshi Miyazaki
and Shigeki Shibayama and Haruo Yokota",
title = "A relational data base machine: {First} step to
knowledge base machine",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "423--425",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arvind:1983:CMN,
author = "Arvind and Robert A. Iannucci",
title = "A critique of multiprocessing {von Neumann} style",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "3",
pages = "426--436",
month = jun,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hill:1983:ACM,
author = "Dwight D. Hill",
title = "An analysis of {C} machine support for other
block-structured languages",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "4",
pages = "6--16",
month = sep,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Marovac:1983:IID,
author = "Nenad Marovac",
title = "On interprocess interaction in distributed
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "4",
pages = "17--22",
month = sep,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schalkoff:1983:TED,
author = "Robert J. Schalkoff",
title = "Towards an efficient, dedicated architecture for a
{Digital Geometric Image Transformer (DGIT)}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "4",
pages = "23--29",
month = sep,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Plotkin:1983:TSA,
author = "Arieh Plotkin and Daniel Tabak",
title = "A {Tree Structured Architecture} for semantic gap
reduction",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "4",
pages = "30--44",
month = sep,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilkes:1983:KJI,
author = "Maurice V. Wilkes",
title = "Keeping jump instructions out of the pipeline of a
{RISC}-like computer",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "5",
pages = "5--7",
month = dec,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jones:1983:PM,
author = "Jeremy Jones",
title = "Puzzling with microcode",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "5",
pages = "8--12",
month = dec,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Amsbury:1983:CSA,
author = "Wayne Amsbury",
title = "A code-splitting algorithm",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "5",
pages = "13--21",
month = dec,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dongarra:1983:PVC,
author = "Jack J. Dongarra",
title = "Performance of various computers using standard linear
equations software in a {Fortran} environment",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "5",
pages = "22--27",
month = dec,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhujade:1983:DAC,
author = "M. R. Bhujade",
title = "On the design of {Always Compatible Instruction Set
Architecture(ACISA)}",
journal = j-COMP-ARCH-NEWS,
volume = "11",
number = "5",
pages = "28--30",
month = dec,
year = "1983",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:17 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Heath:1984:RER,
author = "J. L. Heath",
title = "Re-evaluation of the {RISC I}",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "1",
pages = "3--10",
month = mar,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1984:RW,
author = "David A. Patterson",
title = "{RISC} watch",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "1",
pages = "11--19",
month = mar,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beeler:1984:BBB,
author = "Michael Beeler",
title = "Beyond the {Baskett} benchmark",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "1",
pages = "20--31",
month = mar,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Feustel:1984:PEP,
author = "Edward A. Feustel",
title = "Process exchange on the {PR1ME} family of computers",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "1",
pages = "32--43",
month = mar,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fenwick:1984:AOA,
author = "P. M. Fenwick",
title = "Addressing operations for automatic data structure
accessing",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "1",
pages = "44--57",
month = mar,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuen:1984:SAI,
author = "C. K. Yuen",
title = "Some applications of the implicit register reference",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "1",
pages = "58--63",
month = mar,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kavi:1984:AQ,
author = "Krishna M. Kavi and K. Krishnamohan",
title = "Architecture quality",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "1",
pages = "64--72",
month = mar,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agrawal:1984:BHH,
author = "Dharma P. Agrawal and Winser E. Alexander",
title = "{B-HIVE}: a heterogeneous, interconnected, versatile
and expandable multicomputer system",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "2",
pages = "7--13",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burkowski:1984:VAM,
author = "F. J. Burkowski",
title = "A vector and array multiprocessor extension of the
sylvan architecture",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "4--11",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kapauan:1984:PPC,
author = "Alejandro Kapauan and J. Timothy Field and Dennis B.
Gannon and Lawrence Snyder",
title = "The {Pringle} parallel computer",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "12--20",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yasrebi:1984:SAS,
author = "Mehrad Yasrebi and G. J. Lipovski",
title = "A state-of-the-art {SIMD} two-dimensional {FFT} array
processor",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "21--27",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ma:1984:ARS,
author = "Y. W. Ma and R. Krishnamurti",
title = "The architecture of {Replica}: a special-purpose
computer system for active multi-sensory perception of
$3$-dimensional objects",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "30--37",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goldwasser:1984:GOD,
author = "Samuel M. Goldwasser",
title = "A generalized object display processor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "38--47",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kawakami:1984:SPL,
author = "Katsura Kawakami and Shigeo Shimazaki",
title = "A special purpose {LSI} processor using the {DDA}
algorithm for image transformation",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "48--54",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wah:1984:SMM,
author = "Benjamin W. Wah and Guo-Jie Li and Chee-Fen Yu",
title = "The status of {MANIP} --- a multicomputer architecture
for solving, combinatorial extremum-search problems",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "56--63",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gonzalez-Rubio:1984:SFP,
author = "R. Gonzalez-Rubio and J. Rohmer and D. Terral",
title = "The {SCHUSS} filter: a processor for non-numerical
data processing",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "64--73",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ebeling:1984:DIV,
author = "Carl Ebeling and Andrew Palay",
title = "The design and implementation of a {VLSI} chess move
generator",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "74--80",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1984:PAC,
author = "Manjai Lee and Chuan-lin Wu",
title = "Performance analysis of circuit switching, baseline
interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "82--90",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kruskal:1984:IBS,
author = "Clyde P. Kruskal and Marc Snir",
title = "The importance of being square",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "91--98",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chin:1984:CPM,
author = "Chi-Yuan Chin and Kai Hwang",
title = "Connection principles for multipath, packet switching
networks",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "99--108",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weiss:1984:IIL,
author = "Shlomo Weiss and James E. Smith",
title = "Instruction issue logic for pipelined supercomputers",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "110--118",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wedig:1984:RBI,
author = "Robert G. Wedig and Marc A. Rose",
title = "The reduction of branch instruction execution overhead
using structured control flow",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "119--125",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Banerjee:1984:FEL,
author = "Utpal Banerjee and Daniel D. Gajski",
title = "Fast execution of loops with if statements",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "126--132",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gajski:1984:PPR,
author = "Daniel Gajski and Won Kim and Shinya Fushimi",
title = "A parallel pipelined relational query processor: an
architectural overview",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "134--141",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Somani:1984:EVD,
author = "Arun K. Somani and Vinod K. Agarwal",
title = "An efficient {VLSI} dictionary machine",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "142--150",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fisher:1984:DMS,
author = "Allan L. Fisher",
title = "Dictionary machines with a small number of
processors",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "151--156",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hill:1984:EEC,
author = "Mark D. Hill and Alan Jay Smith",
title = "Experimental evaluation of on-chip microprocessor
cache memories",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "158--166",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1984:USC,
author = "James R. Goodman and Men-chow Chiang",
title = "The use of static column {RAM} as a memory hierarchy",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "167--173",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Haikala:1984:CHRa,
author = "I. J. Haikala",
title = "Cache hit ratios with geometric task switch
intervals",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "175--175",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ishikawa:1984:DOO,
author = "Yutaka Ishikawa and Mario Tokoro",
title = "The design of an object oriented architecture",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "178--187",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ungar:1984:ASS,
author = "David Ungar and Ricki Blau and Peter Foley and Dain
Samples and David Patterson",
title = "Architecture of {SOAR}: {Smalltalk} on a {RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "188--197",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bose:1984:DIS,
author = "Pradip Bose and Edward S. Davidson",
title = "Design of instruction set architectures for support of
high-level languages",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "198--206",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Quinton:1984:ASS,
author = "Patrice Quinton",
title = "Automatic synthesis of systolic arrays from uniform
recurrent equations",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "208--214",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:1984:MDS,
author = "Chang nian Zhang and David Y. Y. Yun",
title = "Multi-dimensional systolic networks, for {Discrete
Fourier Transform}",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "215--222",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fortes:1984:DBL,
author = "J. A. B. Fortes and D. I. Moldovan",
title = "Data broadcasting in linearly scheduled array
processors",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "224--231",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramakrishnan:1984:MMM,
author = "I. V. Ramakrishnan and P. J. Varman",
title = "Modular matrix multiplication on a linear array",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "232--238",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rao:1984:JEE,
author = "T. R. N. Rao",
title = "Joint encryption and error correction schemes",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "240--241",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bose:1984:UEC,
author = "Bella Bose",
title = "Unidirectional error correction\slash detection for
{VLSI} memory",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "242--244",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1984:ECC,
author = "C. L. Chen",
title = "Error-correcting codes for semiconductor memories",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "245--247",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghaffar:1984:SEC,
author = "Khaled Abdel Ghaffar and Robert J. McEliece",
title = "Soft error correction for increased densities in
{VLSI} memories",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "248--250",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{King:1984:CSA,
author = "Richard M. King and Robert A. Wagner",
title = "Combining speed with alpha-particle induced memory,
error tolerance in a large {Boolean} vector machine",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "251--253",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhuyan:1984:PLC,
author = "Laxmi N. Bhuyan",
title = "On the performance of loosely coupled
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "256--262",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mehrotra:1984:STD,
author = "Ravi Mehrotra and Sarosh N. Talukdar",
title = "Scheduling of tasks for distributed processors",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "263--270",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kavi:1984:MRD,
author = "Krishna M. Kavi and Edward W. Banios and Bruce D.
Shriver",
title = "Message repository definitional facility: an
architectural model for interprocess communication",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "271--278",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Banerjee:1984:FSA,
author = "Prithviraj Banerjee and Jacob A. Abraham",
title = "Fault-secure algorithms for multiple-processor
systems",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "279--287",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bic:1984:ELP,
author = "Lubomir Bic",
title = "Execution of logic programs on a dataflow
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "290--296",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rudd:1984:HPF,
author = "W. G. Rudd and Duncan A. Buell and Donald M.
Chiarulli",
title = "A high performance factoring machine",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "297--300",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Emer:1984:CPP,
author = "Joel S. Emer and Douglas W. Clark",
title = "A characterization of processor performance in the
{VAX-11\slash 780}",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "301--310",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moeller:1984:PPP,
author = "W. D. Moeller and G. Sandweg",
title = "The peripheral processor {PP4}, a highly regular
{VLSI} processor",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "312--318",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Philipson:1984:VBD,
author = "Lars Philipson",
title = "{VLSI} based design principles for {MIMD}
multiprocessor computers with distributed memory
management",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "319--327",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Samatham:1984:MNS,
author = "M. R. Samatham and D. K. Pradhan",
title = "A multiprocessor network suitable for single-chip
{VLSI} implementation",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "328--339",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rudolph:1984:DDC,
author = "Larry Rudolph and Zary Segall",
title = "Dynamic decentralized cache schemes for {MIMD}
parallel processors",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "340--347",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Papamarcos:1984:LOC,
author = "Mark S. Papamarcos and Janak H. Patel",
title = "A low-overhead coherence solution for multiprocessors
with private cache memories",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "348--354",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Archibald:1984:ESC,
author = "James Archibald and Jean Loup Baer",
title = "An economical solution to the cache coherence
problem",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "355--362",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Haikala:1984:CHRb,
author = "Ilkka J. Haikala",
title = "Cache hit ratios with geometric task switch
intervals",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "3",
pages = "364--371",
month = jun,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chesley:1984:WM,
author = "Gilman D. Chesley",
title = "A wafer microcomputer",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "4",
pages = "4--6",
month = sep,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Siegel:1984:PRP,
author = "Howard Jay Siegel and Thomas Schwederski and Nathaniel
J. {Davis IV} and James T. Kuehn",
title = "{PASM}: a reconfigurable parallel system for image
processing",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "4",
pages = "7--19",
month = sep,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aslam:1984:MDC,
author = "Javaid Aslam",
title = "Methodology for designing a computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "5",
pages = "4--11",
month = dec,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:18 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Graham:1984:PAS,
author = "Peter C. J. Graham",
title = "Providing architectural support for expert systems",
journal = j-COMP-ARCH-NEWS,
volume = "12",
number = "5",
pages = "12--18",
month = dec,
year = "1984",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:18 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dongarra:1985:PVC,
author = "Jack J. Dongarra",
title = "Performance of various computers using standard linear
equations software in a {Fortran} environment",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "1",
pages = "3--11",
month = mar,
year = "1985",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296930.1296931",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:22 MDT 2008",
bibsource = "ftp://ftp.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This note compares the performance of different
computer systems while solving dense systems of linear
equations using the LINPACK software in a Fortran
environment. About 100 computers, ranging from a CRAY
X-MP to the 68000 based systems such as the Apollo and
SUN Workstations to IBM PC's, are compared.",
acknowledgement = ack-nhfb,
classcodes = "C4140 (Linear algebra); C5470 (Performance evaluation
and testing); C7310 (Mathematics computing)",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "68000 based; Apollo workstations; Cray X-MP; dense
systems; evaluation; FORTRAN environment; IBM PCs;
linear algebra; linear equations; LINPACK; performance;
performance comparison; performance evaluation;
software; Sun Workstations; systems",
treatment = "X Experimental",
}
@Article{Hor:1985:DPP,
author = "T. M. Hor and C. K. Yuen",
title = "The design and programming of a powerful short
wordlength processor using context-dependent machine
instructions",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "1",
pages = "12--26",
month = mar,
year = "1985",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296930.1296932",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:22 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Context-dependent machine instructions were used to
extend the capability of instruction set of a short
wordlength processor. By freeing instruction bits for
other purposes, a more powerful machine instruction set
can be designed. Programming examples were given to
illustrate the benefit obtained from the design. Less
CPU time and memory space were required as compared
with popular 8-bit CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Miya:1985:MDP,
author = "E. N. Miya",
title = "Multiprocessor\slash distributed processing
bibliography (in machine-readable form)",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "1",
pages = "27--29",
month = mar,
year = "1985",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296930.1296933",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:22 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "There is a lot of renewed interest in parallel
processing. People parallel process, too. Human
parallel processing tends to be cooperative rather than
competitive. To this end, research literature uses
bibliographies like road-maps to the field.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "annotated bibliography; cellular automata; computer
system architecture; fault-tolerant computers;
multicomputers; multiprocessor software; networks;
operating systems; parallel algorithms; parallel
processing; programming languages; supercomputers;
vector processing",
}
@Article{Hu:1985:DAE,
author = "Weiming Hu",
title = "Dataflow architecture for {EEG} patient monitor",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "2",
pages = "3--10",
month = jun,
year = "1985",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296935.1296936",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Much work is currently directed towards dataflow
architectures. Most of the proposed architectures
attempt to exploit fine grained parallelism. This paper
describes an application specific dataflow architecture
which exploits coarse grained parallelism. The
application is that of a real-time patient monitor used
to display patient data.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tagg:1985:SEA,
author = "A. G. Tagg",
title = "Speculations on the evolution of an architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "2",
pages = "11--18",
month = jun,
year = "1985",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296935.1296937",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "PRIME computers was formed in the early 1970s by a
splinter group of hardware and software engineers from
Honeywell. With them, they brought their ideas on
minicomputers, based on their experience of Honeywell
minis, and their experience of the MULTICS operating
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Randell:1985:HST,
author = "Brian Randell",
title = "Hardware\slash software tradeoffs: a general design
principle?",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "2",
pages = "19--21",
month = jun,
year = "1985",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1296935.1296938",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware and software are logically equivalent. Any
operation performed by software can also be built
directly into the hardware and any instruction executed
by the hardware can also be simulated in software. The
decision to put certain features in hardware and others
in software is based on such factors as cost, speed,
reliability and frequency of change. There are no hard
and fast rules to the effect that X must go into the
hardware and Y must be programmed explicitly. Designers
with different goals may, and often do, make different
decisions\ldots{} the boundary between hardware and
software is arbitrary and constantly changing. Today's
software is tomorrow's hardware, and vice versa. [1]",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:1985:APM,
author = "V. K. Prasanna Kumar and C. S. Raghavendra",
title = "Array processor with multiple broadcasting",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "2--10",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wolf:1985:MMI,
author = "G. Wolf and J. R. Jump",
title = "Matrix multiplication in an interleaved array
processing architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "11--17",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1985:PVD,
author = "J. R. Goodman and Jian-tu Hsieh and Koujuch Liou and
Andrew R. Pleszkun and P. B. Schechter and Honesty C.
Young",
title = "{PIPE}: a {VLSI} decoupled architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "20--27",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1985:TST,
author = "Peter Y. T. Hsu and Joseph T. Rahmeh and Edward S.
Davidson and Jacob A. Abraham",
title = "{TIDBITS}: speedup via time-delay bit-slicing in {ALU}
design for {VLSI} technology",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "29--35",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1985:IPI,
author = "James E. Smith and Andrew R. Pleszkun",
title = "Implementation of precise interrupts in pipelined
processors",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "36--44",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schwetman:1985:CPP,
author = "Herb Schwetman and Daniel Gajski and Dennis Gannon and
Daniel Hills and Jacob Schwartz and James Browne",
title = "Classification of parallel processor architectures
(invited tutorial session)",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "45--45",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hasegawa:1985:HST,
author = "Makoto Hasegawa and Yoshiharu Shigei",
title = "High-speed top-of-stack scheme for {VLSI} processor: a
management algorithm and its analysis",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "48--54",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hitchcock:1985:AMR,
author = "Charles Y. {Hitchcock III} and H. M. Brinkley Sprunt",
title = "Analyzing multiple register sets",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "55--63",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1985:CEI,
author = "Alan Jay Smith",
title = "Cache evaluation and the impact of workload choice",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "64--73",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moon:1985:AS,
author = "David A. Moon",
title = "Architecture of the {Symbolics 3600}",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "76--83",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ram:1985:PGC,
author = "Ashwin Ram and Janak H. Patel",
title = "Parallel garbage collection without synchronization
overhead",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "84--90",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sohi:1985:ELE,
author = "Gurindar S. Sohi and Edward S. Davidson and Janak H.
Patel",
title = "An efficient {LISP}-execution architecture with a new
representation for list structures",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "91--98",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Amano:1985:SIN,
author = "Hideharu Amano and Taisuke Boku and Tomohiro Kudoh and
Hideo Aiso",
title = "{(SM)2-II}: a new version of the sparse matrix solving
machine",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "100--107",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beetem:1985:GS,
author = "John Beetem and Monty Denneau and Don Weingarten",
title = "The {GF11} supercomputer",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "108--115",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1985:MUD,
author = "Bradley Warren Smith and Howard Jay Siegel",
title = "Models for use in the design of macro-pipelined
parallel processors",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "116--123",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Edler:1985:IRM,
author = "Jan Edler and Allan Gottlieb and Clyde P. Kruskal and
Kevin P. McAuliffe and Larry Rudolph and Marc Snir and
Patricia J. Teller and James Wilson",
title = "Issues related to {MIMD} shared-memory computers: the
{NYU Ultracomputer} approach",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "126--135",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ibbett:1985:MPV,
author = "R. N. Ibbett and P. C. Capon and N. P. Topham",
title = "{MU6V}: a parallel vector processing system",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "136--144",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lundstrom:1985:DCH,
author = "Stephen F. Lundstrom",
title = "A decentralized control, highly concurrent
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "145--151",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dally:1985:OOA,
author = "William J. Dally and James T. Kajiya",
title = "An object oriented architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "154--161",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gehringer:1985:TAH,
author = "Edward F. Gehringer and J. Leslie Keedy",
title = "Tagged architecture: how compelling are its
advantages?",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "162--170",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nanba:1985:VAV,
author = "S. Nanba and N. Ohno and H. Kubo and H. Morisue and T.
Ohshima and H. Yamagishi",
title = "{VM\slash 4}: {ACOS-4} virtual machine architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "171--178",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dobry:1985:PSP,
author = "T. P. Dobry and A. M. Despain and Y. N. Patt",
title = "Performance studies of a {Prolog} machine
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "180--190",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nakazaki:1985:DHS,
author = "Ryosei Nakazaki and Akihiko Konagaya and Shin'ichi
Habata and Hideo Shimazu and Mamoru Umemutra and
Masahiro Yamamoto and Minoru Yokota and Takashi
Chikayama",
title = "Design of a high-speed {Prolog} machine {(HPM)}",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "191--197",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Woo:1985:HUU,
author = "Nam Sung Woo",
title = "A hardware unification unit: design and analysis",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "198--205",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matelan:1985:FM,
author = "Nicholas Matelan",
title = "The {FLEX\slash 32} multicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "209--213",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rattner:1985:CMT,
author = "J. Rattner",
title = "Commercial multiprocessors (title only)",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "214--214",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Naedel:1985:CCA,
author = "Dick Naedel",
title = "Closely coupled asynchronous hierarchical and parallel
processing in an open architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "215--220",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Savage:1985:PPL,
author = "Jim Savage",
title = "Parallel processing as a language design problem",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "221--224",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rodgers:1985:IMS,
author = "David P. Rodgers",
title = "Improvements in multiprocessor system design",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "225--231",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mark:1985:SCF,
author = "Peter B. Mark",
title = "The {Sequoia} computer: a fault-tolerant
tightly-coupled multiprocessor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "232--232",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nestle:1985:SNS,
author = "Elliot Nestle and Armond Inselberg",
title = "The {SYNAPSE N+1 System}: architectural
characteristics and performance data of a
tightly-coupled multiprocessor system",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "233--239",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Horst:1985:AHV,
author = "Robert W. Horst and Timothy C. K. Chou",
title = "An architecture for high volume transaction
processing",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "240--245",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stone:1985:FGC,
author = "Harold Stone and Eric Manning and Harriet Rigas and
Philip Treleaven",
title = "The fifth generation computer systems projects
(invited session)",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "247--247",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kamiya:1985:HPA,
author = "Shigeo Kamiya and Susumu Matsuda and Kazuhide Iwata
and Shigeki Shibayama and Hiroshi Sakai and Kunio
Murakami",
title = "A hardware pipeline algorithm for relational database
operation",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "250--257",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1985:DMR,
author = "Dik Lun Lee",
title = "A distributed multiple-response resolver for
value-order retrieval",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "258--265",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Feo:1985:DDR,
author = "John Feo and Roy Jenevein and J. C. Browne",
title = "Dynamic, distributed resource configuration on
{SW}-banyans",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "268--275",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Katz:1985:ICC,
author = "R. H. Katz and S. J. Eggers and D. A. Wood and C. L.
Perkins and R. G. Sheldon",
title = "Implementing a cache consistency protocol",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "276--283",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:1985:TRS,
author = "Zhiyuan Li and Walid Abu-Sufah",
title = "A technique for reducing synchronization overhead in
large scale multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "284--291",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Whitby-Strevens:1985:T,
author = "Colin Whitby-Strevens",
title = "The transputer",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "292--300",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hurson:1985:SMU,
author = "A. R. Hurson and B. Shirazi",
title = "A systolic multiplier unit and its {VLSI} design",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "302--309",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Melhem:1985:LSS,
author = "Rami Melhem",
title = "A language for the simulation of systolic
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "310--314",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chuang:1985:VSA,
author = "Henry Y. H. Chuang and Guo He",
title = "A versatile systolic array for matrix computations",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "315--322",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vedder:1985:HDF,
author = "Rex Vedder and Dennis Finn",
title = "The {Hughes Data Flow Multiprocessor}: architecture
for efficient signal and data processing",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "324--332",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Traub:1985:APG,
author = "Kenneth R. Traub",
title = "An abstract parallel graph reduction machine",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "333--341",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Preiss:1985:DFQ,
author = "Bruno R. Preiss and V. C. Hamacher",
title = "Data flow on a queue machine",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "342--351",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gaudiot:1985:MHS,
author = "J. L. Gaudiot",
title = "Methods for handling structures in data-flow systems",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "352--358",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Samatham:1985:BMN,
author = "M. R. Samatham and D. K. Pradhan",
title = "The {de Bruijn} multiprocessor network: a versatile
sorting network",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "360--367",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tzeng:1985:FTS,
author = "Nian-Feng Tzeng and Pen-Chung Yew and Chun-Qi Zhu",
title = "A fault-tolerant scheme for multistage interconnection
networks",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "368--375",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:1985:DAF,
author = "V. P. Kumar and S. M. Reddy",
title = "Design and analysis of fault-tolerant multistage
interconnection networks with low link complexity",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "376--386",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Davis:1985:PAP,
author = "Nathaniel J. {Davis IV} and Howard Jay Siegel",
title = "The performance analysis of partitioned circuit
switched multistage interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "387--394",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vrsalovic:1985:IPD,
author = "Dalibor Vrsalovic and Edward F. Gehringer and Zary Z.
Segall and Daniel P. Siewiorek",
title = "The influence of parallel decomposition strategies on
the performance of multiprocessor systems",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "396--405",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abu-Sufah:1985:PPT,
author = "Walid Abu-Sufah and Alex Y. Kwok",
title = "Performance prediction tools for {Cedar}: a
multiprocessor supercomputer",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "406--413",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grino:1985:ASM,
author = "Jos{\'e} M. Llaber{\'\i}a Gri{\~n}{\'o} and Mateo
Valero Cort{\'e}s and Enrique Herrada Lillo and
Jes{\'u}s Labarta Mancho",
title = "Analysis and simulation of multiplexed single-bus
networks with and without buffering",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "414--421",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sanguinetti:1985:PMB,
author = "J. Sanguinetti and B. Kumar",
title = "Performance of a message-based multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "3",
pages = "424--425",
month = jun,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:54 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hake:1985:PDP,
author = "J.-Fr. Hake",
title = "{PDOC} --- a database on parallel processing
literature",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "4",
pages = "2--7",
month = sep,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rockey:1985:DAS,
author = "Mark Rockey",
title = "The dataflow architecture: a suitable base for the
implementation of expert systems",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "4",
pages = "8--14",
month = sep,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cragon:1985:ADS,
author = "Harvey G. Cragon",
title = "An architecture design system",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "4",
pages = "15--21",
month = sep,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huguet:1985:RRF,
author = "Miquel Huguet and Tom{\'a}s Lang",
title = "A reduced register file for {RISC} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "4",
pages = "22--31",
month = sep,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alexander:1985:TBP,
author = "Cedell A. Alexander and William M. Keshlear and Faye
Briggs",
title = "Translation buffer performance in a {UNIX}
environment",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "5",
pages = "2--14",
month = dec,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:18 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1985:HSC,
author = "Rosanna Lee",
title = "On ``hot spot'' contention",
journal = j-COMP-ARCH-NEWS,
volume = "13",
number = "5",
pages = "15--20",
month = dec,
year = "1985",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:18 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Woo:1986:CHU,
author = "Nam Sung Woo and Richard O'Keefe",
title = "A comment on {``A hardware unification unit: design
and analysis''}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "1",
pages = "2--3",
month = jan,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ruighaver:1986:DAD,
author = "A. B. Ruighaver",
title = "Design aspects of the {Delft Parallel Processor DPP84}
and its programming system",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "1",
pages = "4--8",
month = jan,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hammerstrom:1986:CAP,
author = "Dan Hammerstrom and David Maier and Shreekant
Thakkar",
title = "The {Cognitive Architecture Project}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "1",
pages = "9--21",
month = jan,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1986:BRC,
author = "Alan Jay Smith",
title = "Bibliography and reading on {CPU} cache memories and
related topics",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "1",
pages = "22--42",
month = jan,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:29 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yokota:1986:MAR,
author = "H. Yokota and H. Itoh",
title = "A model and an architecture for a relational knowledge
base",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "2--9",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Amamiya:1986:IEL,
author = "M. Amamiya and M. Takesue and R. Hasegawa and H.
Mikami",
title = "Implementation and evaluation of a
list-processing-oriented data flow machine",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "10--19",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Takahashi:1986:NSS,
author = "K. Takahashi and H. Yamada and H. Nagai and K.
Matsumi",
title = "A new string search hardware architecture for {VLSI}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "20--27",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gupta:1986:PAA,
author = "A. Gupta and C. Forgy and A. Newell and R. Wedig",
title = "Parallel algorithms and architectures for rule-based
systems",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "28--37",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Halstead:1986:CDM,
author = "R. R. {Halstead, Jr.} and T. L. Anderson and R. B.
Osborne and T. L. Sterling",
title = "{Concert}: design of a multiprocessor development
system",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "40--48",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kung:1986:MRB,
author = "H. T. Kung",
title = "Memory requirements for balanced computer
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "49--54",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hong:1986:GAS,
author = "Y. C. Hong and T. H. Payne and L. B. O. Ferguson",
title = "Graph allocation in static dataflow systems",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "55--64",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agrawal:1986:SIR,
author = "P. Agrawal and R. Agrawal",
title = "Software implementation of a recursive fault tolerance
algorithm on a network of computers",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "65--72",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nojiri:1986:MPO,
author = "T. Nojiri and S. Kawasaki and K. Sakoda",
title = "Microprogrammable processor for object-oriented
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "74--81",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thakkar:1986:IFU,
author = "S. S. Thakkar and W. E. Hostmann",
title = "An instruction fetch unit for a graph reduction
machine",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "82--91",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gehringer:1986:FOO,
author = "E. F. Gehringer and R. P. Colwell",
title = "Fast object-oriented procedure calls: lessons from the
{Intel 432}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "92--101",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dias:1986:CMS,
author = "D. M. Dias and B. R. Iyer and P. S. Yu",
title = "On coupling many small systems for transaction
processing",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "104--110",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Malkawi:1986:PMP,
author = "M. I. Malkawi and J. H. Patel",
title = "Performance measurement of paging behavior in
multiprogramming systems",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "111--118",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:1986:ANT,
author = "A. Agarwal and R. L. Sites and M. Horowitz",
title = "{ATUM}: a new technique for capturing address traces
using microcode",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "119--127",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wise:1986:EES,
author = "M. J. Wise",
title = "Experimenting with {EPILOG}: some results and
preliminary conclusions",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "119--127",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shobatake:1986:UPB,
author = "Y. Shobatake and H. Aiso",
title = "A unification processor based on a uniformly
structured cellular hardware",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "128--139",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ito:1986:APE,
author = "N. Ito and M. Sato and E. Kuno and K. Rokusawa",
title = "The architecture and preliminary evaluation results of
the experimental parallel inference machine {PIM-D}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "149--156",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1986:ERC,
author = "A. Seznec",
title = "An efficient routing control for the {SIGMA} network
{$ \Sigma (4) $}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "158--168",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nicoud:1986:RHP,
author = "J. D. Nicoud and K. Skala",
title = "{REYSM}, a high performance, low power multi-processor
bus",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "169--174",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1986:ESG,
author = "K. Y. Lee and W. Hegazy",
title = "The extra stage gamma network",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "175--182",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuhara:1986:EFA,
author = "M. Yuhara and A. Hattori and M. Niwa and M. Kishimoto
and H. Hayashi",
title = "Evaluation of the {FACOM ALPHA Lisp} machine",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "184--190",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pleszkun:1986:AEL,
author = "A. R. Pleszkun and M. J. Thazhuthaveetil",
title = "An architecture for efficient {Lisp} list access",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "191--198",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nakata:1986:FLS,
author = "T. Nakata and N. Koike",
title = "A functional level simulation engine of {MAN-YO}: a
special purpose parallel machine for logic design
automation",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "202--208",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Frank:1986:EPS,
author = "E. H. Frank",
title = "Exploiting parallelism in a switch-level simulation
machine",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "209--215",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anantharaman:1986:HAS,
author = "T. S. Anantharaman and R. Bisiani",
title = "A hardware accelerator for speech recognition
algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "216--223",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shimada:1986:EPD,
author = "T. Shimada and K. Hiraki and K. Nishida and S.
Sekiguchi",
title = "Evaluation of a prototype data flow processor of the
{SIGMA-1} for scientific computations",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "226--234",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sargeant:1986:SDS,
author = "J. Sargeant and C. C. Kirkham",
title = "Stored data structures on the {Manchester} dataflow
machine",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "235--242",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hawakami:1986:SDS,
author = "K. Hawakami and J. R. Gurd",
title = "A scalable dataflow structure store",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "243--250",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hasegawa:1986:FFT,
author = "M. Hasegawa and Y. Shigei",
title = "{$ A T^2 = O(N \log^4 N), T = O(\log N) $} {Fast
Fourier Transform} in a light connected $3$-dimensional
{VLSI}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "252--260",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sapiecha:1986:MAH,
author = "K. Sapiecha and R. Jarocki",
title = "Modular architecture for high performance
implementation of {FFT} algorithm",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "261--270",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Navarro:1986:CSI,
author = "J. J. Navarro and J. M. Llaberia and M. Valero",
title = "Computing size-independent matrix problems on systolic
array processors",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "271--278",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tomita:1986:CLL,
author = "S. Tomita and K. Shibayama and T. Nakata and S. Yuasa
and H. Hagiwara",
title = "A computer with low-level parallelism {QA-2}: its
applications to {$3$-D} graphics and {Prolog\slash
Lisp} machines",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "280--289",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hirayama:1986:VOA,
author = "M. Hirayama",
title = "{VLSI} oriented asynchronous architecture",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "290--296",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwu:1986:HHP,
author = "W. Hwu and Y. N. Patt",
title = "{HPSm}, a high performance restricted data flow
architecture having minimal functionality",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "297--306",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Onaga:1986:DRA,
author = "K. Onaga and T. Takechi",
title = "On design of rotary array communication and
wavefront-driven algorithms for solving large-scale
band-limited matrix equations",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "308--315",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Napolitano:1986:CAD,
author = "L. M. {Napolitano, Jr.}",
title = "A computer architecture for dynamic finite element
analysis",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "316--323",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harper:1986:PEV,
author = "D. T. {Harper III} and J. R. Jump",
title = "Performance evaluation of vector accesses in parallel
memories using a skewed storage scheme",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "324--328",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kondo:1986:PMA,
author = "T. Kondo and T. Tsuchiya and T. Kitamura and Y.
Sugiyama and T. Kimura",
title = "Pseudo {MIMD} array processor---{AAP2}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "330--337",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fisher:1986:SLA,
author = "A. L. Fisher",
title = "Scan line array processors for image computation",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "338--345",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Annaratone:1986:WAI,
author = "M. Annaratone and E. Arnould and T. Gross and H. T.
Kung and M. S. Lam",
title = "{Warp} architecture and implementation",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "346--356",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wood:1986:CAT,
author = "D. A. Wood and S. J. Eggers and G. Gibson and M. D.
Hill and J. M. Pendleton",
title = "An in-cache address translation mechanism",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "358--365",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheriton:1986:SCC,
author = "D. R. Cheriton and G. A. Slavenburg and P. D. Boyle",
title = "Software-controlled caches in the {VMP}
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "366--374",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1986:URV,
author = "J. R. Goodman and W. C. Hsu",
title = "On the use of registers vs. cache to minimize memory
traffic",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "375--383",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1986:HCS,
author = "P. Y. T. Hsu and E. S. Davidson",
title = "Highly concurrent scalar processing",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "386--395",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McFarling:1986:RCB,
author = "S. McFarling and J. Hennesey",
title = "Reducing the cost of branches",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "396--403",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kunkel:1986:OPS,
author = "S. R. Kunkel and J. E. Smith",
title = "Optimal pipelining in supercomputers",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "404--411",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sweazey:1986:CCC,
author = "P. Sweazey and A. J. Smith",
title = "A class of compatible cache consistency protocols and
their support by the {IEEE Futurebus}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "414--423",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bitar:1986:MCS,
author = "P. Bitar and A. M. Despain",
title = "Multiprocessor cache synchronization: issues,
innovations, evolution",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "424--433",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dubois:1986:MAB,
author = "M. Dubois and C. Scheurich and F. Briggs",
title = "Memory access buffering in multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "434--442",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Taylor:1986:ESL,
author = "G. S. Taylor and P. N. Hilfinger and J. R. Larus and
D. A. Patterson and B. G. Zorn",
title = "Evaluation of the {SPUR Lisp} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "2",
pages = "444--452",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Woo:1986:RCC,
author = "Nam Sung Woo",
title = "A reply to comments {``A Comment on 'A Hardware
Unification Unit: Design and Analysis''\,'}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "3",
pages = "2--4",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DuBose:1986:MR,
author = "D. K. DuBose and D. K. Fotakis and D. Tabak",
title = "A microcoded {RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "3",
pages = "5--16",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lang:1986:RRS,
author = "Tom{\'a}s Lang and Miquel Huguet",
title = "Reduced register saving\slash restoring in
single-window register files",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "3",
pages = "17--26",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rouse:1986:TDH,
author = "Larry O'Neal Rouse",
title = "The twisted double helix: a minimum distance
architecture for 5th generation computing",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "3",
pages = "27--33",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harland:1986:RMT,
author = "David M. Harland",
title = "A recursively microcodable tagged architecture",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "3",
pages = "34--40",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alexander:1986:CMP,
author = "Cedell Alexander and William Keshlear and Furrokh
Cooper and Faye Briggs",
title = "Cache memory performance in a {Unix} environment",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "3",
pages = "41--61",
month = jun,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stokes:1986:THV,
author = "Roger Stokes",
title = "Traces for hardware verification",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "4",
pages = "7--14",
month = sep,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kirner:1986:DDS,
author = "Claudio Kirner and Eduardo Marques",
title = "Design of a distributed system support based on a
centralized parallel bus",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "4",
pages = "15--26",
month = sep,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Irwin:1986:STR,
author = "Mary Jane Irwin",
title = "Secretary\slash Treasurer's {Report}",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "4",
pages = "28--28",
month = sep,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harland:1986:MOO,
author = "David M. Harland and Bruno Beloff",
title = "Microcoding an object-oriented instruction set",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "5",
pages = "3--12",
month = dec,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:18 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stallings:1986:ABR,
author = "William Stallings",
title = "An annotated bibliography on reduced instruction set
computers",
journal = j-COMP-ARCH-NEWS,
volume = "14",
number = "5",
pages = "13--19",
month = dec,
year = "1986",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:18 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Halstead:1987:OCM,
author = "Robert H. {Halstead, Jr.}",
title = "Overview of {Concert MultiLisp}: a multiprocessor
symbolic computing system",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "5--14",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1987:PRS,
author = "Dave Patterson",
title = "A progress report on {SPUR}: {February 1, 1987}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "15--21",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Despain:1987:A,
author = "A. Despain and Y. Patt and V. Srini and P. Bitar and
W. Bush and C. Chien and W. Citrin and B. Fagin and W.
Hwu and S. Melvin and R. McGeer and A. Singhal and M.
Shebanow and P. {Van Roy}",
title = "Aquarius",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "22--34",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kohli:1987:OPP,
author = "Madhur Kohli and Mark E. Giuliano and Jack Minker",
title = "An overview of the {PRISM} project",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "35--42",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hermenegildo:1987:DHP,
author = "M. V. Hermenegildo and R. A. Warren",
title = "Designing a high performance parallel logic
programming system",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "43--52",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mills:1987:CGR,
author = "Jonathan W. Mills",
title = "Coming to grips with a {RISC}: a report of the
progress of the {LOW RISC} design group",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "53--62",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Short:1987:UIS,
author = "Brian Short",
title = "Use of instruction set simulators to evaluate the {LOW
RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "63--67",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gutzmann:1987:ODH,
author = "Kurt M. Gutzmann",
title = "Optimal dimension of hypercubes for sorting",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "68--72",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chesley:1987:AWN,
author = "Gilman Chesley",
title = "Addressable {WSI}: a non-redundant approach",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "73--80",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Biswas:1987:CCS,
author = "Nripendra N. Biswas and S. Srinivas and Trishala
Dharanendra",
title = "A centrally controlled shuffle network for
reconfigurable and fault-tolerant architecture",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "1",
pages = "81--87",
month = mar,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ditzel:1987:BFC,
author = "D. R. Ditzel and H. R. McLellan",
title = "Branch folding in the {CRISP} microprocessor: reducing
branch delay to zero",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "2--8",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeRosa:1987:EBA,
author = "J. A. DeRosa and H. M. Levy",
title = "An evaluation of branch architectures",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "10--16",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwu:1987:CRO,
author = "W. W. Hwu and Y. N. Patt",
title = "Checkpoint repair for out-of-order execution
machines",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "18--26",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sohi:1987:IIL,
author = "G. S. Sohi and S. Vajapeyam",
title = "Instruction issue logic for high-performance,
interruptible pipelined processors",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "27--34",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Swensen:1987:FTS,
author = "J. Swensen and Y. Patt",
title = "Fast temporary storage for serial and parallel
execution",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "35--43",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wong:1987:PAD,
author = "K. Wong and M. A. Franklin",
title = "Performance analysis and design of a logic simulation
machine",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "46--55",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Doshi:1987:MSA,
author = "K. Doshi and P. Varman",
title = "A modular systolic architecture for image
convolutions",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "56--63",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fujita:1987:TMA,
author = "S. Fujita and R. Aibara and M. Yamashita and T. Ae",
title = "A template matching algorithm using
optically-connected {$3$-D} {VLSI} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "64--70",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mendelson:1987:MDF,
author = "B. Mendelson and G. M. Silberman",
title = "Mapping data flow programs on a {VLSI} array of
processors",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "72--80",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghosal:1987:AMA,
author = "D. Ghosal and L. N. Bhuyan",
title = "Analytical modeling and architectural modifications of
a dataflow computer",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "81--89",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Takesue:1987:URM,
author = "M. Takesue",
title = "A unified resource management and execution control
mechanism for data flow machines",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "90--97",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abe:1987:HPI,
author = "S. Abe and T. Bandoh and S. Yamaguchi and K. Kurosawa
and K. Kiriyama",
title = "High performance integrated {Prolog} processor {IPP}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "100--107",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fagin:1987:PSP,
author = "B. S. Fagin and A. M. Despain",
title = "Performance studies of a parallel {Prolog}
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "108--116",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Civera:1987:EVP,
author = "P. L. Civera and F. Maddaleno and G. L. Piccinini and
M. Zamboni",
title = "An experimental {VLSI} {Prolog} interpreter:
preliminary measurements and results",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "117--126",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ridoux:1987:DSM,
author = "O. Ridoux",
title = "Deterministic and stochastic modeling of parallel
garbage collection: towards real-time criteria",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "128--136",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sun:1987:SEP,
author = "C. Sun and Y. Tsu",
title = "The sharing of environment in {AND--OR}-parallel
execution of logic programs",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "137--144",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Guha:1987:AID,
author = "A. Guha and R. Ramnarayan and M. Derstine",
title = "Architectural issues in designing symbolic processors
in optics",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "145--151",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Varma:1987:RMS,
author = "A. Varma and C. S. Raghavendra",
title = "Rearrangeability of multistage shuffle\slash exchange
networks",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "154--162",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beivide:1987:OMC,
author = "R. Beivide and E. Herrada and J. L. Balcazar and J.
Labarta",
title = "Optimized mesh-connected networks for {SIMD} and
{MIMD} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "163--170",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harper:1987:PER,
author = "D. T. {Harper III} and J. R. Jump",
title = "Performance evaluation of reduced bandwidth multistage
interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "171--175",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramachandran:1987:HSI,
author = "U. Ramachandran and M. Solomon and M. Vernon",
title = "Hardware support for interprocess communication",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "178--188",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dally:1987:AMD,
author = "W. J. Dally and L. Chao and A. Chien and S. Hassoun
and W. Horwat and J. Kaplan and P. Song and B. Totty
and S. Wills",
title = "Architecture of a message-driven processor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "189--196",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:1987:ESA,
author = "M. Kumar",
title = "Effect of storage allocation\slash reclamation methods
on parallelism and storage requirements",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "197--205",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:1987:CDS,
author = "J. H. Chang and H. Chao and K. So",
title = "Cache design of a sub-micron {CMOS} {System\slash
370}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "208--213",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Freeman:1987:APM,
author = "M. Freeman",
title = "An architectural perspective on a memory access
controller",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "214--223",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheung:1987:OAG,
author = "K. Cheung and G. Sohi and K. Saluja and D. Pradhan",
title = "Organization and analysis of a gracefully-degrading
interleaved memory system",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "224--231",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Scheurich:1987:CMO,
author = "C. Scheurich and M. Dubois",
title = "Correct memory operation of cache-based
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "234--243",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilson:1987:HCB,
author = "A. W. {Wilson, Jr.}",
title = "Hierarchical cache\slash bus architecture for shared
memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "244--252",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1987:MCD,
author = "R. L. Lee and P. C. Yew and D. H. Lawrie",
title = "Multiprocessor cache design considerations",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "253--262",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eickemeyer:1987:PEM,
author = "R. J. Eickemeyer and J. H. Patel",
title = "Performance evaluation of multiple register sets",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "264--271",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stanley:1987:PAA,
author = "T. J. Stanley and R. G. Wedig",
title = "A performance analysis of automatically managed top of
stack buffers",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "272--281",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moore:1987:CSV,
author = "B. Moore and A. Padegs and R. Smith and W. Buchholz",
title = "Concepts of the {System\slash 370} vector
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "282--288",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pleszkun:1987:WRA,
author = "A. R. Pleszkun and J. R. Goodman and W. C. Hsu and R.
T. Joersz and G. Bier and P. Woest and P. B.
Schechter",
title = "{WISQ}: a restartable architecture using queues",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "290--299",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chow:1987:ATD,
author = "P. Chow and M. Horowitz",
title = "Architectural tradeoffs in the design of {MIPS-X}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "300--308",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ditzel:1987:HAC,
author = "D. R. Ditzel and H. R. McLellan and A. D. Berenbaum",
title = "The hardware architecture of the {CRISP}
microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "2",
pages = "309--319",
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 16:49:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moore:1987:BDN,
author = "Matthew Moore and Charles McDowell",
title = "Bi-directional networks for large parallel
processors",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "3",
pages = "3--4",
month = jun,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kaplan:1987:LLG,
author = "Ian Kaplan",
title = "The {LDF 100}: a large grain dataflow parallel
processor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "3",
pages = "5--12",
month = jun,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lass:1987:WCC,
author = "Stanley Lass",
title = "Wide channel computers",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "3",
pages = "13--16",
month = jun,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bril:1987:IIA,
author = "Reinder J. Bril",
title = "An implementation independent approach to cache
memories",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "3",
pages = "17--24",
month = jun,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bril:1987:CLV,
author = "Reinder J. Bril",
title = "On cacheability of lock-variables in tightly coupled
multiprocessor systems",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "3",
pages = "25--32",
month = jun,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:53 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Iliffe:1987:FLM,
author = "J. K. Iliffe",
title = "A forward-looking method of {Cache} memory control",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "4--10",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bandyopadhyay:1987:CBM,
author = "Amitava Bandyopadhyay and Yuan F. Zheng",
title = "Combining both microcode and hardwired control in
{RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "11--15",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dowd:1987:ERV,
author = "Martin Dowd",
title = "An example {RISC} vector machine architecture",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "16--22",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhatia:1987:MIN,
author = "Sanjiv K. Bhatia and A. G. Starling",
title = "Multilayered {Illiac} network scheme",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "23--31",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nowak:1987:SGP,
author = "Lothar Nowak",
title = "{SAMP:a} general purpose processor based on a
self-timed {VLIW} structure",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "32--39",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ashenden:1987:LWP,
author = "Peter J. Ashenden and Chris J. Barter and Chris D.
Marlin",
title = "The {Leopard} workstation project",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "40--51",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chiang:1987:DEL,
author = "Y. P. Chiang and M. L. Manwaring",
title = "Direct execution {Lisp} and cell memory",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "52--57",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Terry:1987:FCM,
author = "J. M. Terry",
title = "Flow-control machines:the structured execution
architecture {(SXA)}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "4",
pages = "58--69",
month = sep,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wirth:1987:HAP,
author = "Niklaus Wirth",
title = "Hardware architectures for programming languages and
programming languages for hardware architectures",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "2--8",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beck:1987:VAM,
author = "Bob Beck and Bob Kasten and Shreekant Thakkar",
title = "{VLSI} assist for a multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "10--20",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bisiani:1987:ASM,
author = "Roberto Bisiani and Alessandro Forin",
title = "Architectural support for multilanguage parallel
programming on heterogeneous systems",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "21--30",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rashid:1987:MIV,
author = "Richard Rashid and Avadis Tevanian and Michael Young
and David Golub and Robert Baron",
title = "Machine-independent virtual memory management for
paged uniprocessor and multiprocessor architectures",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "31--39",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hayes:1987:ADE,
author = "John R. Hayes and Martin E. Fraeman and Robert L.
Williams and Thomas Zaremba",
title = "An architecture for the direct execution of the
{Forth} programming language",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "42--49",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Steenkiste:1987:TTC,
author = "Peter Steenkiste and John Hennessy",
title = "Tags and type checking in {LISP}: hardware and
software approaches",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "50--59",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Davidson:1987:EIS,
author = "Jack W. Davidson and Richard A. Vaughan",
title = "The effect of instruction set complexity on program
size and memory performance",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "60--64",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Atkinson:1987:DP,
author = "Russell R. Atkinson and Edward M. McCreight",
title = "The dragon processor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "65--69",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1987:CMV,
author = "James R. Goodman",
title = "Coherency for multiprocessor virtual address caches",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "72--81",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cargill:1987:CHS,
author = "T. A. Cargill and B. N. Locanthi",
title = "Cheap hardware support for software debugging and
profiling",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "82--83",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Georgiou:1987:ECI,
author = "C. J. Georgiou and S. L. Palmer and P. L. Rosenfeld",
title = "An experimental coprocessor for implementing
persistent objects on an {IBM 4381}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "84--87",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Magenheimer:1987:IMD,
author = "Daniel J. Magenheimer and Liz Peters and Karl Pettis
and Dan Zuras",
title = "Integer multiplication and division on the {HP}
precision architecture",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "90--99",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wall:1987:MEU,
author = "David W. Wall and Michael L. Powell",
title = "The {Mahler} experience: using an intermediate
language as the machine description",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "100--104",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weiss:1987:SSC,
author = "Shlomo Weiss and James E. Smith",
title = "A study of scalar compilation techniques for pipelined
supercomputers",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "105--109",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bush:1987:CSR,
author = "William R. Bush and A. Dain Samples and David Ungar
and Paul N. Hilfinger",
title = "Compiling {Smalltalk-80} to a {RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "112--116",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chow:1987:HMA,
author = "F. Chow and S. Correll and M. Himelstein and E.
Killian and L. Weber",
title = "How many addressing modes are enough?",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "117--121",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Massalin:1987:SLS,
author = "Henry Massalin",
title = "{Superoptimizer}: a look at the smallest program",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "122--126",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Taki:1987:PAE,
author = "Kazuo Taki and Katzuto Nakajima and Hiroshi Nakashima
and Morihiro Ikeda",
title = "Performance and architectural evaluation of the {PSI}
machine",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "128--135",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Borriello:1987:RVC,
author = "Gaetano Borriello and Andrew R. Cherenson and Peter B.
Danzig and Michael N. Nelson",
title = "{RISCs} vs. {CISCs} for {Prolog}: a case study",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "136--145",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kieburtz:1987:RAS,
author = "Richard B. Kieburtz",
title = "A {RISC} architecture for symbolic computation",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "146--155",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ditzel:1987:DTS,
author = "David R. Ditzel and Hubert R. McLellan and Alan D.
Berenbaum",
title = "Design tradeoffs to support the {C} programming
language in the {CRISP} microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "158--163",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thacker:1987:FMW,
author = "Charles P. Thacker and Lawrence C. Stewart",
title = "{Firefly}: a multiprocessor workstation",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "164--172",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Clark:1987:PPV,
author = "Douglas W. Clark",
title = "Pipelining and performance in the {VAX 8800}
processor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "173--177",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Colwell:1987:VAT,
author = "Robert P. Colwell and Robert P. Nix and John J.
O'Donnell and David B. Papworth and Paul K. Rodman",
title = "A {VLIW} architecture for a trace scheduling
compiler",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "180--192",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Levinthal:1987:PCG,
author = "Adam Levinthal and Pat Hanrahan and Mike Paquette and
Jim Lawson",
title = "Parallel computers for graphics applications",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "193--198",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1987:ZCP,
author = "J. E. Smith and G. E. Dermer and B. D. Vanderwarn and
S. D. Klinger and C. M. Rozewski",
title = "The {ZS-1} central processor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "5",
pages = "199--204",
month = oct,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:25 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Frietman:1987:EOD,
author = "E. E. E. Frietman and A. B. Ruighaver",
title = "An electro-optic data communication system for the
{Delft} parallel processor",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "6",
pages = "2--8",
month = dec,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shippen:1987:TTD,
author = "G. B. Shippen and J. K. Archibald",
title = "A tagged token dataflow machine for computing small,
iterative algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "15",
number = "6",
pages = "9--18",
month = dec,
year = "1987",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:28 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Penn:1988:PSI,
author = "Clif Penn",
title = "Preface to the {Special} issue on {Neural Networks}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "6--6",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lippmann:1988:ICN,
author = "Richard P. Lippmann",
title = "An introduction to computing with neural nets",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "7--25",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anderson:1988:SNN,
author = "James A. Anderson and Edward J. Wisniewski and Susan
R. Viscuso",
title = "Software for neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "26--36",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Garth:1988:ISN,
author = "Simon Garth and Danny Pike",
title = "An integrated system for neural network simulations",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "37--44",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maren:1988:CRI,
author = "A. Jean Maren",
title = "Conference report: {IEEE First International
Conference on Neural Networks}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "45--46",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dongarra:1988:PVC,
author = "Jack J. Dongarra",
title = "Performance of various computers using standard linear
equations software in a {FORTRAN} environment",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "47--69",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wulf:1988:WCA,
author = "Wm. A. Wulf",
title = "The {WM} computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "70--84",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tabak:1988:LIM,
author = "Daniel Tabak",
title = "Logarithmic indices for multiprocessor evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "85--90",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dowd:1988:ERV,
author = "Martin Dowd",
title = "An example {RISC} vector machine architecture",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "91--99",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dowd:1988:RVC,
author = "Martin Dowd",
title = "{RISC} vector {CPU}'s and crossbars in desktops",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "100--102",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lass:1988:MIO,
author = "Stanley Lass",
title = "Multiple instructions\slash operands per access to
cache memory",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "103--103",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gass:1988:WRS,
author = "Wanda Gass",
title = "Workshop report: synthesis of foo bars",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "104--108",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ferguson:1988:BRL,
author = "F. Joel Ferguson",
title = "Book Review: {{\em Logic Design Principles\/}} by
{Edward J. McCluskey, Prentice-Hall Publishers,
Englewood Cliffs, New Jersey, 549 pp., \$39.95}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "1",
pages = "109--109",
month = mar,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:31 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghosh:1988:CIM,
author = "J. Ghosh and K. Hwang",
title = "Critical issues in mapping neural networks on
message-passing multicomputers",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "3--11",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Takefuji:1988:MCS,
author = "Y. Takefuji and R. Jannarone and Y. B. Cho and T.
Chen",
title = "Multinomial conjunctoid statistical learning
machines",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "12--17",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Louri:1988:BPA,
author = "A. Louri and K. Hwang",
title = "A bit-plane architecture for optical computing with
two-dimensional symbolic substitution",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "18--27",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fiske:1988:RAP,
author = "S. Fiske and W. J. Dally",
title = "The reconfigurable arithmetic processor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "30--36",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pleszkun:1988:PPM,
author = "A. R. Pleszkun and G. S. Sohi",
title = "The performance potential of multiple functional unit
processors",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "37--44",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwu:1988:EPM,
author = "W. W. Hwu and P. P. Chang",
title = "Exploiting parallel microprocessor microarchitectures
with a compiler code generator",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "45--53",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McNiven:1988:AMR,
author = "G. D. McNiven and E. S. Davidson",
title = "Analysis of memory referencing behavior for design of
local memories",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "56--63",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eickenmeyer:1988:PEC,
author = "R. J. Eickenmeyer and J. H. Patel",
title = "Performance evaluation of on-chip register and cache
organizations",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "64--72",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baer:1988:IPM,
author = "J.-L. Baer and W.-H. Wang",
title = "On the inclusion properties for multi-level cache
hierarchies",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "73--80",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Short:1988:SST,
author = "R. T. Short and H. M. Levy",
title = "A simulation study of two-level caches",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "81--88",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chow:1988:HNH,
author = "E. Chow and H. Madan and J. Peterson and D. Grunwald
and D. Reed",
title = "Hyperswitch network for the hypercube computer",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "90--99",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Winsor:1988:ABH,
author = "D. C. Winsor and T. N. Mudge",
title = "Analysis of bus hierarchies for multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "100--107",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wei:1988:EGN,
author = "S. Wei and G. Lee",
title = "Extra group network: a cost-effective fault-tolerant
multistage interconnection network",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "108--115",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jiang:1988:PMB,
author = "H. Jiang and K. C. Smith",
title = "A partial-multiple-bus computer structure with
improved cost effectiveness",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "116--122",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Watson:1988:FPA,
author = "I. Watson and V. Woods and P. Watson and R. Banach and
M. Greenberg and J. Sargeant",
title = "{Flagship}: a parallel architecture for declarative
programming",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "124--130",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Iannucci:1988:TDN,
author = "R. A. Iannucci",
title = "Toward a dataflow\slash {von Neumann} hybrid
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "131--140",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Culler:1988:RRD,
author = "D. E. Culler and Arvind",
title = "Resource requirements of dataflow programs",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "141--150",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sprunt:1988:PDP,
author = "B. Sprunt and D. Kirk and L. Sha",
title = "Priority-driven, preemptive {I/O} controllers for
real-time systems",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "152--159",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shukla:1988:KIP,
author = "S. B. Shukla and D. P. Agrawal",
title = "A kernel-independent, pipelined architecture for
real-time {$2$-D} convolution",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "160--166",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:1988:EBL,
author = "W. Liu and T.-F. Yeh and W. E. Batchelor and R.
Cavin",
title = "Exploiting bit level concurrency in real-time
geometric feature extractions",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "167--174",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Clark:1988:MVP,
author = "D. W. Clark and P. J. Bannon and J. B. Keller",
title = "Measuring {VAX 8800} performance with a histogram
hardware monitor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "176--185",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sites:1988:MCA,
author = "R. L. Sites and A. Agarwal",
title = "Multiprocessor cache analysis using {ATUM}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "186--195",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ng:1988:TOB,
author = "S. Ng and D. Lang and R. Selinger",
title = "Trade-offs between devices and paths in achieving disk
interleaving",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "196--201",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jainandunsing:1988:DCC,
author = "K. Jainandunsing and E. F. Deprettere",
title = "Design of a concurrent computer for solving systems of
linear equations",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "204--211",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wolfe:1988:WDH,
author = "A. Wolfe and M. {Breternitz, Jr.} and C. Stephens and
A. L. Ting and D. B. Kirk and R. P. {Bianchini, Jr.}
and J. P. Shen",
title = "The white dwarf: a high-performance
application-specific processor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "212--222",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gaudiot:1988:SPD,
author = "J. L. Gaudiot and C. M. Lin and M. Hosseiniyar",
title = "Solving partial differential equations in a
data-driven multiprocessor environment",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "223--230",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1988:SSP,
author = "D. Lee",
title = "Scrambled storage for parallel memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "232--239",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Krishnaswamy:1988:ALC,
author = "V. Krishnaswamy and S. Ahuja and N. Carriero and D.
Gelernter",
title = "The architecture of a {Linda} coprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "240--249",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kung:1988:DAS,
author = "H. T. Kung",
title = "Deadlock avoidance for systolic communication",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "252--260",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{So:1988:CPV,
author = "K. So and V. Zecca",
title = "Cache performance of vector processors",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "261--268",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vernon:1988:DRR,
author = "M. K. Vernon and U. Manber",
title = "Distributed round-robin and first-come first-serve
protocols and their applications to multiprocessor bus
arbitration",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "269--279",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:1988:EDS,
author = "A. Agarwal and R. Simoni and J. Hennessy and M.
Horowitz",
title = "An evaluation of directory schemes for cache
coherence",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "280--298",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Prybylski:1988:PTC,
author = "S. Prybylski and M. Horowitz and J. Hennessy",
title = "Performance tradeoffs in cache design",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "290--298",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheong:1988:CCS,
author = "H. Cheong and A. V. Vaidenbaum",
title = "A cache coherence scheme with fast selective
invalidation",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "299--307",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vernon:1988:AEP,
author = "M. K. Vernon and E. D. Lazowska and J. Zahorjan",
title = "An accurate and efficient performance analysis
technique for multiprocessor snooping cache-consistency
protocols",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "308--315",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rau:1988:DTR,
author = "D. Rau and J. A. B. Fortes and H. J. Siegel",
title = "Destination tag routing techniques based on a state
model for the {LADM} network",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "318--324",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:1988:RCB,
author = "D. W. Kim and G. J. Lipovski and A. Hartmann and R.
Jenevein",
title = "Regular {CC}-banyan networks",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "325--332",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jenevein:1988:TAR,
author = "R. M. Jenevein and T. Mookken",
title = "Traffic analysis of rectangular {SW}-banyan networks",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "333--342",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tamir:1988:HPM,
author = "Y. Tamir and G. L. Frazier",
title = "High-performance multi-queue buffers for {VLSI}
communications switches",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "343--354",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Preiss:1988:CBM,
author = "B. R. Preiss and V. C. Hamacher",
title = "A cache-based message passing scheme for a shared-bus
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "358--364",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Boku:1988:IHP,
author = "T. Boku and S. Nomura and H. Amano",
title = "{IMPULSE}: a high performance processing unit for
multiprocessors for scientific calculation",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "365--372",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eggers:1988:CSP,
author = "S. J. Eggers and R. H. Katz",
title = "A characterization of sharing in parallel programs and
its application to coherency protocol evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "373--382",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipovski:1988:FOI,
author = "G. J. Lipovski and P. Vaughan",
title = "A fetch-and-op implementation for parallel computers",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "384--392",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1988:SPT,
author = "A. Seznec and Y. J{\'e}gou",
title = "Synchronizing processors through memory requests in a
tightly coupled multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "393--400",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fujimoto:1988:DPS,
author = "R. M. Fujimoto and J.-J. Tsai and G. Gopalakrishnan",
title = "Design and performance of special purpose hardware for
time warp",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "401--409",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheriton:1988:VMI,
author = "D. R. Cheriton and A. Gupta and P. D. Boyle and H. A.
Goosen",
title = "The {VMP} multiprocessor: initial experience,
refinements, and performance evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "410--421",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1988:WMN,
author = "J. R. Goodman and P. J. Woest",
title = "The {Wisconsin} multicube: a new large-scale
cache-coherent multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "422--431",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tick:1988:DBP,
author = "E. Tick",
title = "Data buffer performance for sequential {Prolog}
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "434--442",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Halstead:1988:MMP,
author = "R. H. {Halstead, Jr.} and T. Fujita",
title = "{MASA}: a multithreaded processor architecture for
parallel symbolic computing",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "443--451",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Butler:1988:PAO,
author = "P. L. Butler and J. D. {Allen, Jr.} and D. W.
Bouldin",
title = "Parallel architecture for {OPS5}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "2",
pages = "452--457",
month = may,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheriton:1988:CCM,
author = "David R. Cheriton and Pat Boyle and Gert A.
Slavenburg",
title = "Comments on {``Coherency for multiprocessor virtual
addresses caches''} by {James R. Goodman}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "3--6",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1988:RDR,
author = "James R. Goodman",
title = "Reply to {David R. Cheriton's, Pat Boyle's, and Gert
A. Slavenburg's ``Comments on 'Coherency for
multiprocessor virtual addressed caches''\,' by James
R. Goodman}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "7--7",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rabbat:1988:TDC,
author = "Guy Rabbat and Borko Furht and Ron Kibler",
title = "Three-dimensional computers and measuring their
performance",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "9--16",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Castan:1988:MPG,
author = "M. Castan and A. Contessa and E. Cousin and C. Coustet
and B. Lecussan",
title = "{MaRs}: a parallel graph reduction multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "17--24",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Contessa:1988:AFT,
author = "Alessandro Contessa",
title = "An approach to fault tolerance and error recovery in a
parallel graph reduction machine: {MaRS}---a case
study",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "25--32",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Crawford:1988:EHH,
author = "Chuck Crawford",
title = "Evolution of the {Harris H-series} computers and
speculations on their future",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "33--39",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Good:1988:SIC,
author = "Philip L. Good",
title = "Structuring an instruction cache",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "40--43",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:1988:CMM,
author = "Eric E. Johnson",
title = "Completing an {MIMD} multiprocessor taxonomy",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "44--47",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jones:1988:UR,
author = "Douglas W. Jones",
title = "The ultimate {RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "48--55",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jones:1988:MC,
author = "Douglas W. Jones",
title = "A minimal {CISC}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "56--63",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lass:1988:SCM,
author = "Stanley Lass",
title = "Shared cache multiprocessing with pack computers",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "64--70",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:1988:SVS,
author = "Norman P. Jouppi",
title = "Superscalar vs. superpipelined machines",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "71--80",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schachter:1988:BRH,
author = "Lorne H. Schachter",
title = "Book review of {{\em High-Performance Computer
Architecture\/}} by {Harold S. Stone. Addison-Wesley
1987}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "3",
pages = "81--84",
month = jun,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:55 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramachandran:1988:PSI,
author = "Umakishore Ramachandran",
title = "Preface to the {Special Issue on Architectural Support
for Operating Systems}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "11--11",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Asthana:1988:IMS,
author = "A. Asthana and H. V. Jagadish and J. A. Chandross and
D. Lin and S. C. Knauer",
title = "An intelligent memory system",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "12--20",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beltrametti:1988:CMM,
author = "Monica Beltrametti and Kenneth Bobey and John R.
Zorbas",
title = "The control mechanism for the {Myrias} parallel
computer system",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "21--30",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Finkel:1988:YSM,
author = "Raphael Finkel and Debra Hengsen",
title = "{YACKOS} on a shared-memory multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "31--36",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pucci:1988:OCE,
author = "Marc F. Pucci and J. L. Alberi",
title = "Optimized communication in an extended remote
procedure call model",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "37--46",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cortadella:1988:DRC,
author = "Jordi Cortadella and Teodor Jov{\'e}",
title = "Dynamic {RAM} for on-chip instruction caches",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "45--50",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Naderi:1988:MPEa,
author = "M. Naderi",
title = "Modelling and performance evaluation of
multiprocessors organization with shared memories",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "51--74",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gehringer:1988:SCP,
author = "Edward Gehringer and Janne Abullarade and Michael H.
Gulyn",
title = "A survey of commercial parallel processors",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "75--107",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lease:1988:CPS,
author = "Mark Lease and Mac Lively",
title = "Comparing production system architectures",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "108--116",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Page:1988:FAH,
author = "Ivor Page and Jeff Niehaus",
title = "The {Flex} architecture, a high speed graphics
processor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "117--129",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Murakami:1988:OKU,
author = "Kazuaki Murakami and Akira Fukuda and Toshinori
Sueyoshi and Shinji Tomita",
title = "An overview of the {Kyushu University} reconfigurable
parallel processor",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "130--137",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Percus:1988:SRC,
author = "Ora E. Percus and J. K. Percus",
title = "Some results concerning clock-regulated queues",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "138--144",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Williams:1988:SSS,
author = "Fleur Liane Williams",
title = "Should {SCC} set condition codes?",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "145--149",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Steven:1988:NEA,
author = "Gordon B. Steven",
title = "A novel effective address calculation mechanism for
{RISC} microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "150--156",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parhami:1988:DFV,
author = "Behrooz Parhami",
title = "From defects to failures: a view of dependable
computing",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "157--168",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1988:RP,
author = "David A. Patterson",
title = "{RISCY} patents",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "169--191",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Takacs:1988:BRV,
author = "Helen C. Takacs",
title = "Book review: {{\em A VLSI Architecture for Concurrent
Data Structures\/}} by {William J. Dally (Kluwer
1988)}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "192--193",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Colwell:1988:BRC,
author = "Robert P. Colwell",
title = "Book review: {{\em Computer Architecture and
Organization}}, 2nd ed. by {John P. Hayes (McGraw Hill,
1988)}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "193--195",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McDowell:1988:BRS,
author = "Charles E. McDowell",
title = "Book review: {{\em Supercomputer Architectures\/}} by
{Paul B. Schneck (Kluwer Academic Publishers)}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "4",
pages = "195--196",
month = sep,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:11 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hum:1988:SWF,
author = "Herbert H. J. Hum and Guang R. Gao",
title = "Summary of the workshop on frontiers in functional
programming and dataflow architecture",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "5",
pages = "12--19",
month = dec,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{vanTilborg:1988:IDC,
author = "Andre M. van Tilborg",
title = "Instrumentation for distributed computing systems",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "5",
pages = "20--25",
month = dec,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Griffin:1988:UUR,
author = "Glenn W. Griffin",
title = "The ultimate ultimate {RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "5",
pages = "26--32",
month = dec,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jones:1988:RCR,
author = "Douglas W. Jones",
title = "Risks of comparing {RISCs}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "5",
pages = "33--34",
month = dec,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Naderi:1988:MPEb,
author = "M. Naderi",
title = "Modelling and performance evaluation of
multiprocessors, organizations with multi-memory
units",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "5",
pages = "35--51",
month = dec,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kogge:1988:VRB,
author = "Peter Kogge and John Oldfield and Mark Brule and
Charles Stormon",
title = "{VLSI} and rule-based systems",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "5",
pages = "52--65",
month = dec,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parhami:1988:BRM,
author = "Behrooz Parhami",
title = "Book review: {{\em Memory Storage Patterns in Parallel
Processing\/}} by {Mary A. Mace (Kluwer Academic
Publishers, Boston, 1987, 139 pp.)}",
journal = j-COMP-ARCH-NEWS,
volume = "16",
number = "5",
pages = "76--76",
month = dec,
year = "1988",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moskowitz:1989:AMM,
author = "J. P. Moskowitz and C. Jousselin",
title = "An algebraic memory model",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "1",
pages = "55--62",
month = mar,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wong:1989:SAS,
author = "W. F. Wong",
title = "A stack addressing scheme based on windowing",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "1",
pages = "63--69",
month = mar,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:1989:PTD,
author = "Anonymous",
title = "Pipelining through {Dynamic Control ROM}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "1",
pages = "70--72",
month = mar,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lass:1989:SIC,
author = "Stanley E. Lass",
title = "Some innovations in computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "1",
pages = "73--77",
month = mar,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bitar:1989:BRR,
author = "Philip Bitar",
title = "Book reviews: Review of {{\em Parallel Execution of
Logic Programs\/}} by {John Conery. Kluwer Academic
Publishers 1987}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "1",
pages = "81--82",
month = mar,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cohn:1989:ACT,
author = "Robert Cohn and Thomas Gross and Monica Lam",
title = "Architecture and compiler tradeoffs for a long
instruction word processor",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "2--14",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sohi:1989:TIF,
author = "Gurindar S. Sohi and Sriram Vajapeyam",
title = "Tradeoffs in instruction format design for horizontal
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "15--25",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dehnert:1989:OLS,
author = "James C. Dehnert and Peter Y.-T. Hsu and Joseph P.
Bratt",
title = "Overlapped loop support in the {Cydra 5}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "26--38",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burkowski:1989:ASS,
author = "F. J. Burkowski and G. V. Cormack and G. D. P. Dueck",
title = "Architectural support for synchronous task
communication",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "40--53",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gupta:1989:FBM,
author = "Rajiv Gupta",
title = "The fuzzy barrier: a mechanism for high speed
synchronization of processors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "54--63",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodman:1989:ESP,
author = "James R. Goodman and Mary K. Vernon and Philip J.
Woest",
title = "Efficient synchronization primitives for large-scale
cache-coherent multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "64--75",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mellor-Crummey:1989:SIC,
author = "J. M. Mellor-Crummey and T. J. LeBlanc",
title = "A software instruction counter",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "78--86",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aral:1989:EDP,
author = "Z. Aral and I. Gerther and G. Schaffer",
title = "Efficient debugging primitives for multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "87--95",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Staknis:1989:SMA,
author = "M. E. Staknis",
title = "Sheaved memory: architectural support for state saving
and restoration in pages systems",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "96--102",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Holliday:1989:RHP,
author = "M. A. Holliday",
title = "Reference history, page size, and migration daemons in
local\slash remote architectures",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "104--112",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Black:1989:TLB,
author = "D. L. Black and R. F. Rashid and D. B. Golub and C. R.
Hill",
title = "Translation lookaside buffer consistency: a software
approach",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "113--122",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gibson:1989:FCT,
author = "G. A. Gibson and L. Hellerstein and R. M. Karp and D.
A. Patterson",
title = "Failure correction techniques for large disk arrays",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "123--132",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:1989:UVS,
author = "N. P. Jouppi and J. Bertoni and D. W. Wall",
title = "A unified vector\slash scalar floating-point
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "134--143",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mulder:1989:DBR,
author = "H. Mulder",
title = "Data buffering: run-time versus compile-time support",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "144--151",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Adams:1989:AIS,
author = "T. L. Adams and R. E. Zimmerman",
title = "An analysis of 8086 instruction set usage in {MS DOS}
programs",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "152--160",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Roos:1989:RTS,
author = "J. Roos",
title = "A real-time support processor for {Ada} tasking",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "162--171",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vegdahl:1989:RES,
author = "Steven R. Vegdahl and Uwe F. Pleban",
title = "The runtime environment for {Scheme}, a {Scheme}
implementation on the 88000",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "172--182",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McFarling:1989:POI,
author = "S. McFarling",
title = "Program optimization for instruction caches",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "183--191",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Karger:1989:URO,
author = "Paul A. Karger",
title = "Using registers to optimize cross-domain call
performance",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "194--204",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arnould:1989:DNN,
author = "Emmanuel Arnould and H. T. Kung and Fran{\c{c}}ois
Bitz and Robert D. Sansom and Eric C. Cooperm",
title = "The design of nectar: a network backplane for
heterogeneous multicomputers",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "205--216",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Delgado-Rannauro:1989:MDP,
author = "S. A. Delgado-Rannauro and T. J. Reynolds",
title = "A message driven {OR}-parallel machine",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "217--228",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Owicki:1989:EPS,
author = "S. Owicki and A. Agarwal",
title = "Evaluating the performance of software cache
coherence",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "230--242",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weber:1989:ACI,
author = "W. Weber and A. Gupta",
title = "Analysis of cache invalidation patterns in
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "243--256",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eggers:1989:ESC,
author = "S. J. Eggers and R. H. Katz",
title = "The effect of sharing on the cache and bus performance
of parallel programs",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "257--270",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:1989:AIL,
author = "N. P. Jouppi and D. W. Wall",
title = "Available instruction-level parallelism for
superscalar and superpipelined machines",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "272--282",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dally:1989:MOF,
author = "W. J. Dally",
title = "Micro-optimization of floating-point operations",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "283--289",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1989:LMI,
author = "M. D. Smith and M. Johnson and M. A. Horowitz",
title = "Limits on multiple instruction issue",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "2",
pages = "290--302",
month = apr,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:39 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eggers:1989:EPF,
author = "S. J. Eggers and R. H. Katz",
title = "Evaluating the performance of four snooping cache
coherency protocols",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "2--15",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheriton:1989:MLS,
author = "D. R. Cheriton and H. A. Goosen and P. D. Boyle",
title = "Multi-level shared caching techniques for scalability
in {VMP-M/C}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "16--24",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goto:1989:DPC,
author = "A. Goto and A. Matsumoto and E. Tick",
title = "Design and performance of a coherent cache for
parallel logic programming architectures",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "25--33",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grafe:1989:EDP,
author = "V. G. Grafe and G. S. Davidson and J. E. Hoch and V.
P. Holmes",
title = "The {Epsilon} dataflow processor",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "36--45",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sakai:1989:ADS,
author = "S. Sakai and y. Yamaguchi and K. Hiraki and Y. Kodama
and T. Yuba",
title = "An architecture of a dataflow single chip processor",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "46--53",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nitezki:1989:EDP,
author = "P. Nitezki",
title = "Exploiting data parallelism in signal processing on a
dataflow machine",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "54--61",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ibbett:1989:AMS,
author = "R. N. Ibbett and T. M. Hopkins and K. I. M. McKinnon",
title = "Architectural mechanisms to support sparse vector
processing",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "64--71",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harper:1989:DSS,
author = "D. T. Harper and D. A. Linebarger",
title = "A dynamic storage scheme for conflict-free vector
access",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "72--77",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Murakami:1989:SSI,
author = "K. Murakami and N. Irie and S. Tomita",
title = "{SIMP} (Single Instruction stream\slash Multiple
instruction Pipelining): a novel high-speed
single-processor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "78--85",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ben-Asher:1989:DSA,
author = "Y. Ben-Asher and D. Egozi and A. Schuster",
title = "{$2$-D SIMD} algorithms in the perfect shuffle
networks",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "88--95",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Valero-Garcia:1989:SHA,
author = "M. Valero-Garcia and J. J. Navarro and J. M. Llaberia
and M. Valero",
title = "Systematic hardware adaptation of systolic
algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "96--104",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1989:TMH,
author = "M.-S. Chen and K. G. Shin",
title = "Task migration in hypercube multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "105--111",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Przybylski:1989:CPO,
author = "S. Przybylski and M. Horowitz and J. Hennessy",
title = "Characteristics of performance-optimal multi-level
cache hierarchies",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "114--121",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wood:1989:SRD,
author = "D. A. Wood and R. H. Katz",
title = "Supporting reference and dirty bits in {SPUR}'s
virtual address cache",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "122--130",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kessler:1989:IIS,
author = "R. E. Kessler and R. Jooss and A. Lebeck and M. D.
Hill",
title = "Inexpensive implementations of set-associativity",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "131--139",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:1989:OPT,
author = "W. H. Wang and J.-L. Baer and H. M. Levy",
title = "Organization and performance of a two-level
virtual-real cache hierarchy",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "140--148",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jesshope:1989:HPC,
author = "C. R. Jesshope and P. R. Miller and J. T. Yantchev",
title = "High performance communications in processor
networks",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "150--157",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mizrahi:1989:IMS,
author = "H. E. Mizrahi and J. L. Baer and E. D. Lazowska and J.
Zahorjan",
title = "Introducing memory into the switch elements of
multiprocessor interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "158--166",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Scott:1989:UFC,
author = "S. L. Scott and G. S. Sohi",
title = "Using feedback to control tree saturation in
multistage interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "167--176",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ezhilchelvan:1989:CRS,
author = "P. D. Ezhilchelvan and S. K. Shrivastava and A.
Tully",
title = "Constructing replicated systems using processors with
point-to-point communication links",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "177--184",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Benker:1989:KKC,
author = "H. Benker and J. M. Beacco and M. Dorochevsky and Th.
Jeffr{\'e} and A. P{\"o}hlmann and J. Noy{\'e} and B.
Poterie and J. C. Syre and O. Thibault and G.
Watzlawik",
title = "{KCM}: a knowledge crunching machine",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "186--194",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singhal:1989:HPP,
author = "A. Singhal and Y. N. Patt",
title = "A high performance {Prolog} processor with multiple
function units",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "195--202",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Morioka:1989:EMS,
author = "M. Morioka and S. Yamaguchi and T. Bandoh",
title = "Evaluation of memory system for integrated {Prolog}
processor {IPP}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "203--210",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wong:1989:TDH,
author = "K.-F. Wong and M. H. Williams",
title = "A type driven hardware engine for {Prolog} clause
retrieval over a large knowledge base",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "211--222",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwu:1989:CSH,
author = "W. W. Hwu and T. M. Conte and P. P. Chang",
title = "Comparing software and hardware schemes for reducing
the cost of branches",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "224--233",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farrens:1989:IPS,
author = "M. K. Farrens and a. R. Pleszkun",
title = "Improving performance of small on-chip instruction
caches",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "234--241",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwu:1989:AHI,
author = "W. W. Hwu and P. P. Chang",
title = "Achieving high instruction cache performance with an
optimizing compiler",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "242--251",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Steenkiste:1989:ICD,
author = "P. Steenkiste",
title = "The impact of code density on instruction cache
performance",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "252--259",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nikhil:1989:CDS,
author = "R. S. Nikhil",
title = "Can dataflow subsume {von Neumann} computing?",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "262--272",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weber:1989:EBM,
author = "W.-D. Weber and A. Gupta",
title = "Exploring the benefits of multiple hardware contexts
in a multiprocessor architecture: preliminary results",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "273--280",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:1989:AOT,
author = "N. P. Jouppi",
title = "Architectural and organizational tradeoffs in the
design of the {MultiTitan CPU}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "281--289",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sato:1989:RTC,
author = "M. Sato and S. Ichikawa and E. Goto",
title = "Run-time checking in {Lisp} by integrating memory
addressing and range checking",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "290--297",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hopper:1989:MVW,
author = "A. Hopper and A. Jones and D. Lioupis",
title = "Multiple vs. wide shared bus multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "300--306",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Annaratone:1989:PMC,
author = "M. Annaratone and R. R{\"u}hl",
title = "Performance measurements on a commercial
multiprocessor running parallel code",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "307--314",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Annaratone:1989:ICS,
author = "M. Annaratone and C. Pommerell and R. R{\"u}hl",
title = "Interprocessor communication speed and performance in
distributed-memory parallel processors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "315--324",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghosal:1989:ACC,
author = "D. S. Ghosal and S. K. Tripathi and L. N. Bhuyan and
H. Jiang",
title = "Analysis of computation-communication issues in
dynamic dataflow architectures",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "325--333",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kravitz:1989:LSM,
author = "S. Kravitz and R. E. Bryant and R. Rutenbar",
title = "Logic simulation on massively parallel architectures",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "336--343",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fukazawa:1989:RRP,
author = "T. Fukazawa and T. Kimura and M. Tomizawa and K.
Takeda and Y. Itoh",
title = "{R256}: a research parallel processor for scientific
computation",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "344--351",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anido:1989:TPT,
author = "M. L. Anido and D. J. Allerton and E. J. Zaluska",
title = "A three-port\slash three-access register file for
concurrent processing and {I/O} communication in a
{RISC}-like graphics engine",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "354--361",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mulder:1989:AFA,
author = "J. M. Mulder and R. J. Portier and A. Srivastava and
R. in't Velt",
title = "An architecture framework for application-specific and
scalable architectures",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "362--369",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:1989:PLS,
author = "K. Kim and V. K. Prasanna-Kumar",
title = "Perfect {Latin} squares and parallel array access",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "372--379",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weiss:1989:ASS,
author = "S. Weiss",
title = "An aperiodic storage scheme to reduce memory conflicts
in vector processors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "380--386",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1989:AVA,
author = "C.-L. Chen and C.-K. Liao",
title = "Analysis of vector access performance on skewed
interleaved memory",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "387--394",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:1989:ABS,
author = "A. Agarwal and M. Cherian",
title = "Adaptive backoff synchronization techniques",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "396--406",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stenstrom:1989:CCP,
author = "P. Stenstr{\"o}m",
title = "A cache consistency protocol for multiprocessors with
multistage networks",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "407--415",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Su:1989:DSM,
author = "H.-M. Su and P.-C. Yew",
title = "On data synchronization for multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "3",
pages = "416--423",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{vanTilborg:1989:PFD,
author = "A. M. van Tilborg",
title = "Panel on future directions in parallel computer
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "3--53",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gunther:1989:PBS,
author = "N. J. Gunther and M. T. Noga",
title = "{ParcBench}: a benchmark for shared-memory
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "54--61",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Elkateeb:1989:PSR,
author = "A. Elkateeb and T. Le-Ngoc",
title = "A priority strategy on {RISC} for real-time
multitasking software applications",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "62--68",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oyang:1989:MCA,
author = "Y.-J. Oyang",
title = "A multiprocessor configuration in accordance with the
aspects of physical and systems design",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "69--73",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seebauer:1989:MCEa,
author = "H. Seebauer",
title = "A memory controller executing segment operations in
time {$ O(1) $}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "74--81",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schwartz:1989:DDD,
author = "R. J. Schwartz",
title = "The design and development of a dynamic program
behavior measurement tool for the {Intel 8086\slash
88}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "82--94",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martin:1989:FAM,
author = "A. J. Martin and S. M. Burns and T. K. Lee and D.
Borkovic and P. J. Hazewindus",
title = "The first asynchronous microprocessor: the test
results",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "95--110",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cornett:1989:UMS,
author = "F. Cornett",
title = "The {UT1000} microprogramming simulator: an
educational tool",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "111--118",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuen:1989:BDD,
author = "C. K. Yuen and W. F. Wong",
title = "A bidirectional data driven {Lisp} engine for the
direct execution of {Lisp} in parallel",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "4",
pages = "119--130",
month = jun,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smotherman:1989:SBT,
author = "M. Smotherman",
title = "A sequencing-based taxonomy of {I/O} systems and
review of historical machines",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "5--15",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cousins:1989:DCR,
author = "R. Cousins",
title = "{DMA} considerations on {RISC} workstations",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "16--23",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Katz:1989:PHP,
author = "R. H. Katz",
title = "A project on high performance {I/O} subsystems",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "24--31",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dibble:1989:BSB,
author = "P. C. Dibble and M. L. Scott",
title = "Beyond striping: the bridge multiprocessor file
system",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "32--39",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reddy:1989:SPD,
author = "A. L. N. Reddy and P. Banerjee",
title = "A study parallel disk organizations",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "40--47",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1989:MRT,
author = "J. M. Smith and G. Q. {Maguire, Jr.}",
title = "Measured response times for page-sized fetches on a
network",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "48--54",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wolman:1989:ISI,
author = "B. Wolman and T. M. Olson",
title = "{IOBENCH}: a system independent {IO} benchmark",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "55--70",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oslon:1989:DAP,
author = "T. M. Oslon",
title = "Disk array performance in a random {IO} environment",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "71--77",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wolman:1989:ASB,
author = "B. L. Wolman",
title = "An analysis of server-based locking",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "78--82",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Debaere:1989:IPC,
author = "E. H. Debaere",
title = "Instruction-path coprocessing to solve some {RISC}
problems",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "83--94",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seebauer:1989:MCEb,
author = "H. Seebauer",
title = "A memory controller executing segment operations in
time {$ O(1) $}",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "95--102",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chiu:1989:RLF,
author = "P. K. Chiu",
title = "Representation of logic functions by {\tt if--then}
clauses",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "103--107",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baleanu:1989:ECC,
author = "C. Baleanu and D. Tomescu",
title = "Embedding computers in a cellular array",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "108--115",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lass:1989:HES,
author = "S. Lass",
title = "On hardware enhanced 80386 software emulation,
compiled emulation, a program distribution language,
and pack computers",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "5",
pages = "116--118",
month = sep,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Litaize:1989:MSM,
author = "Daniel Litaize and Omar Hammami and Mustapha Lalam and
Adelaziz Mzoughi and Pascl Sinrat",
title = "Multiprocessors with a serial multiport memory and a
pseudo crossbar of serial links used as a
processor-memory switch",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "8--21",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fritsch:1989:DSM,
author = "G. Fritsch and W. Henning and H. Hesenuer and R. Klar
and C. U. Linster and C. w. Oehlrich and P. Schlenk and
J. Vokert",
title = "Distributed shared memory multiprocessor architecture
{MEMSY} for high performance parallel computations",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "22--35",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mendelson:1989:SCC,
author = "A. Mendelson and D. K. Pradhan and A. D. Singh",
title = "A single cached copy data coherence scheme for
multiprocessor systems",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "36--49",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Feitelson:1989:AMU,
author = "Dror G. Feitelson and Larry Rudolph",
title = "Architecture for a multi-user general-purpose parallel
system",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "50--56",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Quammen:1989:RWA,
author = "D. Quammen and D. R. Miller and D. Tabak",
title = "Register window architecture for multitasking
applications",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "57--66",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rosenberg:1989:EEI,
author = "Arnold Rosenberg",
title = "Efficient emulations of interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "67--79",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Scherson:1989:DPC,
author = "Isaac D. Scherson and Peter F. Corbett",
title = "Description and performance of a class of orthogonal
multiprocessor networks",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "80--90",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{David:1989:EIB,
author = "Llana David and Ran Ginosar and Michael Yoeli",
title = "An efficient implementation of {Boolean} functions and
finite state machine as self-timed circuit",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "91--104",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dollan:1989:CSP,
author = "Apostolos Dollan and Robert F. Krick",
title = "The case for the sustained performance computer
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "129--136",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:1989:WSP,
author = "Eric E. Johnson",
title = "Working set prefetching for cache memories",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "137--141",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1989:MPC,
author = "K. e H. Lee and C. H. Lam",
title = "Massage-passing controller for a shared-memory
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "142--149",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1989:LCF,
author = "Tsong-Chih Hsu and Ling-Yang Kung",
title = "Logic and conflict-free vector addresses",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "150--153",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1989:AGU,
author = "Tsong-Chih Hsu and Ling-Yang Kung",
title = "An address generation unit for array accessing",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "154--160",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1989:HMP,
author = "Tsong-Chih Hsu and Ling-Yang Kung",
title = "A hardware mechanism for priority queue",
journal = j-COMP-ARCH-NEWS,
volume = "17",
number = "6",
pages = "162--169",
month = dec,
year = "1989",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dvorak:1990:MAS,
author = "V. Dvorak",
title = "Microsequencer architecture supporting arbitrary
branching up to 2m targets",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "9--9",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dongarra:1990:PVC,
author = "Jack J. Dongarra",
title = "Performance of various computers using standard linear
equations software",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "17--17",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1990:CFO,
author = "Tsong---Chih Hsu and Ling---Yang Kung",
title = "A comment on {``A Fetch-and-Op Implementation for
Parallel Computers''}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "32--32",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cousins:1990:NAC,
author = "Robert Cousins",
title = "A novel approach to character interfaces",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "35--35",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cousins:1990:RPI,
author = "Robert Cousins",
title = "A reentrant peripheral interface",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "43--43",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anderson:1990:ACS,
author = "Noel W. Anderson",
title = "Amorphous computer system architecture: a preliminary
look",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "51--51",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oyang:1990:CEA,
author = "Yen-Jen Oyang and Bor-Ting Chang and Shu-May Lin",
title = "A cost-effective approach to implement a long
instruction word microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "59--59",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fritsch:1990:PBA,
author = "C. Fritsch and T. S{\'a}nchez and J. Anaya",
title = "Primitive based architectures",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "73--73",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lorin:1990:MRC,
author = "Harold Lorin",
title = "A model for recentralization of computing:
(distributed processing comes home)",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "81--81",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Teodosiu:1990:CTD,
author = "Dan Teodosiu",
title = "Computing in three dimensions",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "99--99",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Frazier:1990:ASM,
author = "Gary Frazier",
title = "{Ariel}: a scalable multiprocessor for the simulation
of neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "107--107",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Colwell:1990:BRH,
author = "Robert P. Colwell",
title = "Book review: {{\em High-Level Language Computer
Architecture\/}} edited by {Veljko Milutinovic
(Computer Science Press, 1989)}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "120--122",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parhami:1990:BRA,
author = "Behrooz Parhami",
title = "Book review: {{\em Advanced Research in VLSI}}, edited
by {Charles L. Seitz (The MIT Press, Cambridge, MA,
1989, 373 pp.)}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "1",
pages = "122--123",
month = mar,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matthes:1990:HRG,
author = "Wolfgang Matthes",
title = "Hardware {Resources}: a generalizing view on computer
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "7--14",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rauchwerger:1990:MFP,
author = "Lawrence Rauchwerger and Michael P. Farmwald",
title = "A multiple floating point coprocessor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "15--24",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Glew:1990:SCT,
author = "Andy Glew and Wen-Mei Hwu",
title = "Snoopy cache test-and-test-and-set without excessive
bus contention",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "25--32",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Higbee:1990:QEC,
author = "Lee Higbee",
title = "Quick and easy cache performance analysis",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "33--44",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Park:1990:ISF,
author = "Arvin Park and Jeffrey C. Becker and Richard J.
Lipton",
title = "{IOStone}: a synthetic file system benchmark",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "45--52",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pnevmatikatos:1990:CPI,
author = "Dionisios N. Pnevmatikatos and Mark D. Hill",
title = "Cache performance of the integer {SPEC} benchmarks on
a {RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "53--68",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ruighaver:1990:MND,
author = "A. B. Ruighaver",
title = "A modular network for dense optical interconnection of
processing elements",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "69--75",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeGloria:1990:VVI,
author = "Alessandro {De Gloria}",
title = "{VISA}: a variable instruction set architecture",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "76--84",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Williams:1990:ADR,
author = "Fleur L. Williams and Gordon B. Steven",
title = "Address and data register separation on the {M68000}
family",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "2",
pages = "85--89",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Adve:1990:WON,
author = "Sarita V. Adve and Mark D. Hill",
title = "Weak ordering---a new definition",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "2--14",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gharachorloo:1990:MCE,
author = "Kourosh Gharachorloo and Daniel Lenoski and James
Laudon and Phillip Gibbons and Anoop Gupta and John
Hennessy",
title = "Memory consistency and event ordering in scalable
shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "15--26",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1990:SMC,
author = "Joonwon Lee and Umakishore Ramachandran",
title = "Synchronization with multiprocessor caches",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "27--37",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chuang:1990:DPA,
author = "Po-Jen Chuang and Nian-Feng Tzeng",
title = "Dynamic processor allocation in hypercube computers",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "40--49",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Youssef:1990:NAF,
author = "Abdou Youssef and Bruce Arden",
title = "A new approach to fast control of $ r_2 \times r_2 $
$3$-stage {Benes} networks of $ r \times r$ crossbar
switches",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "50--59",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dally:1990:VCF,
author = "William J. Dally",
title = "Virtual-channel flow control",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "60--68",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Borkar:1990:SSM,
author = "Shekhar Borkar and Robert Cohn and George Cox and
Thomas Gross and H. T. Kung and Monica Lam and Margie
Levine and Brian Moore and Wire Moore and Craig
Peterson and Jim Susman and Jim Sutton and John
Urbanski and Jon Webb",
title = "Supporting systolic and memory communication in
{iWarp}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "70--81",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Papadopoulos:1990:MET,
author = "Gregory M. Papadopoulos and David E. Culler",
title = "{Monsoon}: an explicit token-store architecture",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "82--91",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Annaratone:1990:KPP,
author = "Marco Annaratone and Marco Fillo and Kiyoshi
Nakabayashi and Marc Viredaz",
title = "The {K2} parallel processor: architecture and hardware
implementation",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "92--101",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:1990:APA,
author = "Anant Agarwal and Beng-Hong Lim and David Kranz and
John Kubiatowicz",
title = "{APRIL}: a processor architecture for
multiprocessing",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "104--114",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bisiani:1990:PDS,
author = "Roberto Bisiani and Mosur Ravishankar",
title = "{PLUS}: a distributed shared-memory system",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "115--124",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bennett:1990:ASC,
author = "John K. Bennett and John B. Carter and Willy
Zwaenepoel",
title = "Adaptive software cache management for distributed
shared memory architectures",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "125--134",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ditzel:1990:BSV,
author = "David R. Ditzel and John L. Hennessy and Bernie Rudin
and Alan Jay Smith and Stephen L. Squires and Zeke
Zalcstein",
title = "Big science versus little science---do you have to
build it? (panel session)",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "136--136",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{OKrafka:1990:EET,
author = "Brian W. O'Krafka and A. Richard Newton",
title = "An empirical evaluation of two memory-efficient
directory methods",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "138--147",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lenoski:1990:DBC,
author = "Daniel Lenoski and James Laudon and Kourosh
Gharachorloo and Anoop Gupta and John Hennessy",
title = "The directory-based cache coherence protocol for the
{DASH} multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "148--159",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Przybylski:1990:PIB,
author = "Steven Przybylski",
title = "The performance impact of block sizes and fetch
strategies",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "160--169",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alpert:1990:PCL,
author = "D. Alpert and A. Averbuch and O. Danieli",
title = "Performance comparison of load\slash store and
symmetric instruction set architectures",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "172--181",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Davidson:1990:RCB,
author = "Jack W. Davidson and David B. Whalley",
title = "Reducing the cost of branches by using registers",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "182--191",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Love:1990:ISV,
author = "Carl E. Love and Harry F. Jordan",
title = "An investigation of static versus dynamic scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "192--201",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhandarkar:1990:VVA,
author = "Dileep Bhandarkar and Richard Brunner",
title = "{VAX} vector architecture",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "204--215",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Horst:1990:MII,
author = "Robert W. Horst and Richard L. Harris and Robert L.
Jardine",
title = "Multiple instruction issue in the {NonStop Cyclone}
processor",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "216--226",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thakkar:1990:POA,
author = "Shreekant S. Thakkar and Mark Sweiger",
title = "Performance of an {OLTP} application on symmetry
multiprocessor system",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "228--238",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1990:ISG,
author = "Ding-Kai Chen and Hong-Men Su and Pen-Chung Yew",
title = "The impact of synchronization and granularity on
parallel systems",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "239--248",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bugge:1990:TDS,
author = "H{\aa}kon O. Bugge and Ernst H. Kristiansen and
Bj{\o}rn O. Bakka",
title = "Trace-driven simulations for a two-level cache design
in open bus systems",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "250--259",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1990:PMT,
author = "Jiun-Ming Hsu and Prithviraj Banerjee",
title = "Performance measurement and trace driven simulation of
parallel {CAD} and numeric applications on a hypercube
multicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "260--269",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Borg:1990:GAV,
author = "Anita Borg and R. E. Kessler and David W. Wall",
title = "Generation and analysis of very long address traces",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "270--279",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Holmer:1990:FPE,
author = "Bruce K. Holmer and Barton Sano and Michael Carlton
and Peter {Van Roy} and Ralph Haygood and William R. Bush
and Alvin M. Despain and Joan M. Pendleton and Tep
Dobry",
title = "Fast {Prolog} with an extended general purpose
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "282--291",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alkalaj:1990:ASM,
author = "Leon Alkalaj and Tom{\'a}s Lang and Milo{\v{s}}
Ercegovac",
title = "Architectural support for the management of
tightly-coupled fine-grain goals in flat concurrent
{Prolog}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "292--301",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ho:1990:BAD,
author = "Samuel Ho and Lawrence Snyder",
title = "Balance in architectural design",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "302--310",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reddy:1990:SBP,
author = "A. L. Narasimha Reddy and Prithviraj Banerjee",
title = "A study of {I/O} behavior of perfect benchmarks on a
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "312--321",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1990:MPS,
author = "Peter M. Chen and David A. Patterson",
title = "Maximizing performance in a striped disk array",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "322--331",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shin:1990:DAH,
author = "Kang G. Shin and Greg Dykema",
title = "A distributed {I/O} architecture for {HARTS}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "332--342",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1990:BBS,
author = "Michael D. Smith and Monica S. Lam and Mark A.
Horowitz",
title = "Boosting beyond static scheduling in a superscalar
processor",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "344--354",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Taylor:1990:TSL,
author = "George Taylor and Peter Davies and Michael Farmwald",
title = "The {TLB} slice---a low-cost high-speed address
translation mechanism",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "355--363",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:1990:IDM,
author = "Norman P. Jouppi",
title = "Improving direct-mapped cache performance by the
addition of a small fully-associative cache and
prefetch buffers",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "364--373",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Davidson:1990:BTO,
author = "Edward S. Davidson and Gurindar S. Sohl and Joseph A.
Fisher and Greg Grohoski and Yale Pratt and J. E. Smith
and David R. Stiles",
title = "Better than one operation per clock (panel): vectors,
{VLIW}, and superscalar",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3a",
pages = "376--376",
month = jun,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alverson:1990:TCS,
author = "Robert Alverson and David Callahan and Daniel Cummings
and Brian Koblenz and Allan Porterfield and Burton
Smith",
title = "The {Tera} computer system",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "1--6",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwang:1990:ORB,
author = "K. Hwang and M. Dubois and D. K. Panda and S. Rao and
S. Shang and A. Uresin and W. Mao and H. Nair and M.
Lytwyn and F. Hsieh and J. Liu and S. Mehrotra and C.
M. Cheng",
title = "{OMP}: a {RISC}-based multiprocessor using
orthogonal-access memories and multiple spanning
buses",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "7--22",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dai:1990:BAS,
author = "Kechang Dai and Wolfgang K. Giloi",
title = "A basic architecture supporting {LGDG} computation",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "23--33",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Min:1990:ECS,
author = "Sang Lyul Min and Jean-Loup Baer and Hyoung-Joo Kim",
title = "An efficient caching support for critical sections in
large-scale shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "34--47",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nagashima:1990:IFA,
author = "Umpei Nagashima and Fumio Nishimoto and Takashi
Shibata and Hiroshi Itoh and Minoru Gotoh",
title = "An improvement of {I/O} function for auxiliary
storage: parallel {I/O} for a large scale
supercomputing",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "48--59",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tzeng:1990:AVH,
author = "Nian-Feng Tzeng",
title = "Analysis of a variant hypercube topology",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "60--70",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{vanderHouwen:1990:POS,
author = "P. J. van der Houwen and B. P. Sommeijer",
title = "Parallel {ODE} solvers",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "71--81",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dayde:1990:UPL,
author = "M. J. Dayd{\'e} and I. S. Duff",
title = "Use of parallel level 3 {BLAS} in {LU} factorization
on three vector multiprocessors the {ALLIANT FX/80},
the {CRAY-2}, and the {IBM 3090 VF}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "82--95",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Houstis:1990:ENS,
author = "E. N. Houstis and J. R. Rice and N. P. Chrisochoides
and H. C. Karathanasis and P. N. Papachiou and M. K.
Samartzis and E. A. Vavalis and Ko Yang Wang and S.
Weerawarana",
title = "{//ELLPACK}: a numerical simulation programming
environment for parallel {MIMD} machines",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "96--107",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Christara:1990:SCP,
author = "Christina C. Christara",
title = "{Schur} complement preconditioned conjugate gradient
methods for spline collocation equations",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "108--120",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chung:1990:COP,
author = "Kuo-Liang Chung and Ferng-Ching Lin and Wen-Chin
Chen",
title = "Cost-optimal parallel {B}-spline interpolations",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "121--131",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gallivan:1990:SGS,
author = "K. Gallivan and A. Sameh and Z. Zlatev",
title = "Solving general sparse linear systems using conjugate
gradient-type methods",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "132--139",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuba:1990:DCD,
author = "Toshitsugu Yuba and Toshio Shimada and Yoshinori
Yamaguchi and Kei Hiraki and Shuichi Sakai",
title = "Dataflow computer development in {Japan}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "140--147",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sarkar:1990:PPO,
author = "Vivek Sarkar and David Cann",
title = "{POSC}---a partitioning and optimizing {SISAL}
compiler",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "148--164",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bodin:1990:LOH,
author = "Fran{\c{c}}ois Bodin and Fran{\c{c}}ois Charot",
title = "Loop optimization for horizontal microcoded machines",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "164--176",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tang:1990:CTD,
author = "Peiyi Tang and Pen-Chung Yew and Chuan-Qi Zhu",
title = "Compiler techniques for data synchronization in nested
parallel loops",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "177--186",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hudak:1990:CTD,
author = "David E. Hudak and Santosh G. Abraham",
title = "Compiler techniques for data partitioning of
sequentially iterated parallel loops",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "187--200",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Klappholz:1990:PAA,
author = "David Klappholz and Kleanthis Psarris and Xiangyun
Kong",
title = "On the perfect accuracy of an approximate subscript
analysis test",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "201--212",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Malony:1990:HBP,
author = "Allen D. Malony and Daniel A. Reed",
title = "A hardware-based performance monitor for the {Intel
iPSC/2} hypercube",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "213--226",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dimpsey:1990:PDD,
author = "R. T. Dimpsey and R. K. Iyer",
title = "Performance degradation due to multiprogramming and
system overheads in real workloads: case study on a
shared memory multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "227--238",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Saad:1990:SBP,
author = "Youcef Saad and Harry A. G. Wijshoff",
title = "{SPARK}: a benchmark package for sparse computations",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "239--253",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cybenko:1990:SPE,
author = "George Cybenko and Lyle Kipp and Lynn Pointer and
David Kuck",
title = "Supercomputer performance evaluation and the {Perfect
Benchmarks}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "254--266",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Noor:1990:SLS,
author = "Ahmed K. Noor and Jeanne M. Peters",
title = "Strategies for large-scale structural problems on
high-performance computers",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "267--280",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zecca:1990:ECV,
author = "V. Zecca and A. Kamel",
title = "Elastodynamics on clustered vector multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "281--290",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eijkhout:1990:IPP,
author = "Victor Eijkhout",
title = "Implementation of $5$-point\slash $9$-point
multi-level methods on hypercube architectures",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "291--295",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1990:SBV,
author = "Philip C. Chen",
title = "Supercomputer-based visualization systems used for
analyzing output data of a numerical weather prediction
model",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "296--309",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Takahashi:1990:PAW,
author = "Yoshizo Takahashi and Shigetaka Sasaki",
title = "Parallel automated wire-routing with a number of
competing processors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "310--317",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chan:1990:HAA,
author = "Tony F. Chan",
title = "Hierarchical algorithms and architectures for parallel
scientific computing",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "318--329",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1990:IDA,
author = "Kevin Smith and Bill Appelbe and Kurt Stirewalt",
title = "Incremental dependence analysis for interactive
parallelization",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "330--341",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ruhl:1990:PFC,
author = "Roland R{\"u}hl and Marco Annaratone",
title = "Parallelization of {FORTRAN} code on
distributed-memory parallel processors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "342--353",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gornish:1990:CDD,
author = "Edward H. Gornish and Elana D. Granston and Alexander
V. Veidenbaum",
title = "Compiler-directed data prefetching in multiprocessors
with memory hierarchies",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "354--368",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gao:1990:TEF,
author = "Guang R. Gao and Herbert H. J. Hum and Yue-Bong Wong",
title = "Towards efficient fine-grain software pipelining",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "369--379",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Andre:1990:PSM,
author = "Fran{\c{c}}oise Andr{\'e} and Jean-Louis Pazat and
Henry Thomas",
title = "{Pandore}: a system to manage data distribution",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "380--388",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fatoohi:1990:VPA,
author = "Rod A. Fatoohi",
title = "Vector performance analysis of the {NEC SX-2}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "389--400",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bodin:1990:PEP,
author = "Fran{\c{c}}ois Bodin and Daniel Windheiser and William
Jalby and Daya Atapattu and Mannho Lee and Dennis
Gannon",
title = "Performance evaluation and prediction for parallel
algorithms on the {BBN GP1000}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "401--413",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brochard:1990:DAH,
author = "Luigi Brochard and Alex Freau",
title = "Designing algorithms on hierarchical memory
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "414--427",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bucher:1990:ACM,
author = "Ingrid Y. Bucher and Donald A. Calahan",
title = "Access conflicts in multiprocessor memories queueing
models and simulation studies",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "428--438",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Luque:1990:ITD,
author = "Emilio Luque and Ana Ripoll and Porfidio Hern{\'a}ndez
and Tom{\'a}s Margalef",
title = "Impact of task duplication on static-scheduling
performance in multiprocessor systems with variable
execution-time tasks",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "439--446",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gerasoulis:1990:CTG,
author = "Apostolos Gerasoulis and Sesh Venugopal and Tao Yang",
title = "Clustering task graphs for message passing
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "447--456",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Paalvast:1990:MPP,
author = "Edwin M. Paalvast and Arjan J. van Gemund and Henk J.
Sips",
title = "A method for parallel program generation with an
application to the {Booster} language",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "457--469",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tsoukarellas:1990:RTS,
author = "M. A. Tsoukarellas and T. S. Papatheodorou",
title = "A run time support system for multiprocessor
machines",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "470--478",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hey:1990:STP,
author = "Anthony J. G. Hey",
title = "Supercomputing with transputers---past, present and
future",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "3b",
pages = "479--489",
month = sep,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:03 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1990:EA,
author = "Burton Smith",
title = "The end of architecture",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "10--17",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hill:1990:WS,
author = "Mark D. Hill",
title = "What is scalability?",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "18--21",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Laplante:1990:NSI,
author = "P. A. Laplante",
title = "A novel single instruction computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "22--26",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ginosar:1990:PAP,
author = "Ran Ginosar and Nick Michell",
title = "On the potential of asynchronous pipelined
processors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "27--34",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oyang:1990:EEA,
author = "Yen-Jen Oyang and Chun-Hung Wen and Yu-Fen Chen and
Shu-May Lin",
title = "The effect of employing advanced branching mechanisms
in superscalar processors",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "35--52",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Deville:1990:LCU,
author = "Yannick Deville",
title = "A low-cost usage-based replacement algorithm for cache
memories",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "52--58",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gunther:1990:HSM,
author = "Bernard K. Gunther",
title = "A high speed mechanism for short branches",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "59--61",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McLaughlin:1990:DFD,
author = "Robert McLaughlin",
title = "Design for fast {DSP} machine",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "62--66",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Joerg:1990:SPN,
author = "Werner B. Joerg",
title = "A subclass of {Petri Nets} as design abstraction for
parallel architectures",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "67--77",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1990:UN,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "80--89",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Langdon:1990:BRH,
author = "Glen G. {Langdon, Jr.}",
title = "Book review: {{\em Highly Parallel Computing\/}} by
{George Almasi and Allan Gotlieb (Benjamin\slash
Cummings, 1989)}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "90--90",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Langdon:1990:BRS,
author = "Glen G. {Langdon, Jr.}",
title = "Book review: {{\em Solving Problems on Concurrent
Processors, Vol II: Software for Concurrent
Processors\/}} by {I. Angus, G. Fox, J. Kim, and D.
Walker (Prentice-Hall, 1990)}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "90--91",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dikotter:1990:BRD,
author = "Marc Dikotter",
title = "Book review: {{\em The Definition of Standard ML\/}}
by {R. Milner, M. Torte, R. Harper}",
journal = j-COMP-ARCH-NEWS,
volume = "18",
number = "4",
pages = "91--91",
month = dec,
year = "1990",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Leighton:1991:SPS,
author = "F. T. Leighton",
title = "Selected Papers from the {Symposium on Parallel
Algorithms and Architectures}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "5--5",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ngai:1991:FAR,
author = "John Y. Ngai and Charles L. Seitz",
title = "A framework for adaptive routing in multicomputer
networks",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "6--14",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beigel:1991:PNI,
author = "Richard Beigel and Clydel P. Kruskal",
title = "Processor networks and interconnection networks
without long wires (extended abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "15--24",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Annexstein:1991:FTH,
author = "Fred Annexstein",
title = "Fault tolerance in hypercube-derivative networks
(preliminary version)",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "25--34",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fujimoto:1991:VTM,
author = "Richard M. Fujimoto",
title = "The {Virtual Time Machine}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "35--44",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bilardi:1991:OVA,
author = "Ginfranco Bilardi and Scot W. Hornick and Majid
Sarrafzadeh",
title = "Optimal {VLSI} architectures for multidimensional
{DFT} (preliminary version)",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "45--52",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomborson:1991:SIM,
author = "Clark D. Thomborson and Belle W.-Y. Wei",
title = "Systolic implementations of a move-to-front text
compressor",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "53--60",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Knight:1991:TLL,
author = "Thomas F. {Knight, Jr.}",
title = "Technologies for low latency interconnection
switches",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "61--68",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Herbordt:1991:MPA,
author = "Martin C. Herbordt and Charles C. Weems and James C.
Corbett",
title = "Message-passing algorithms for a {SIMD} torus with
coteries",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "69--78",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Konstantinidou:1991:CRP,
author = "S. Konstantinidou and L. Snyder",
title = "The chaos router: a practical application of
randomization in network routing",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "79--88",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bruck:1991:RAE,
author = "Jehoshua Bruck and Robert Cypher and Danny Soroker",
title = "Running algorithms efficiently on faulty hypercubes
(extended abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "89--96",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nishimura:1991:ASM,
author = "Naomi Nishimura",
title = "Asynchronous shared memory parallel computation
(preliminary version)",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "97--105",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shand:1991:HSL,
author = "M. Shand and P. Bertin and J. Vuillemin",
title = "Hardware speedups in long integer multiplication",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "106--113",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thapar:1991:CCL,
author = "Manu Thapar and Bruce Delagi",
title = "Cache coherence for large scale shared memory
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "114--119",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grabienski:1991:FFS,
author = "Peter Grabienski",
title = "{FLIP-FLOP}: a stack-oriented multiprocessing system",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "120--127",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Price:1991:TAD,
author = "Camille C. Price",
title = "Task allocation in data flow multiprocessors: an
annotated bibliography",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "128--134",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Adams:1991:PPP,
author = "Rod Adams and Gordon Steven",
title = "A parallel pipelined processor with conditional
instruction execution",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "135--142",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1991:UNa,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "146--150",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hilton:1991:BRS,
author = "Michael L. Hilton",
title = "Book review: {{\em Systems Programming in Parallel
Logic Languages\/}} by {Ian Foster (Prentice Hall,
1990)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "151--151",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anthony:1991:BRT,
author = "Keith Anthony",
title = "Book review: {{\em Technology Projection Modeling of
Future Computer Systems\/}} by {Al Cutaia
(Prentice-Hall, 1990)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "152--153",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schneck:1991:BRO,
author = "Paul B. Schneck",
title = "Book review: {{\em Optimizing FORTRAN Programs\/}} by
{C. F. Schofield (Halstead Press, 1989)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "153--154",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bernecky:1991:BRMa,
author = "Robert Bernecky",
title = "Book review: {{\em Multiprocessors\/}} by {Daniel
Tabak (Prentice Hall, Englewood Cliffs, NJ)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "154--156",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bernecky:1991:BRMb,
author = "Robert Bernecky",
title = "Book review: {{\em Multiprocessor Performance\/}} by
{Erol Gelenbe (J. Wiley \& Sons, Chichester,
England)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "156--157",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fulcher:1991:BRN,
author = "John Fulcher",
title = "Book review: {{\em Neural Net Applications and
Products\/}} by {Richard K. Miller, Terri C. Walker,
and Anne M. Ryan (SEAl Technical Publications, 1990)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "1",
pages = "157--158",
month = mar,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wolfe:1991:VIS,
author = "Andrew Wolfe and John P. Shen",
title = "A variable instruction stream extension to the {VLIW}
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "2--14",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Katevenis:1991:RBP,
author = "Manolis Katevenis and Nestoras Tzartzanis",
title = "Reducing the branch penalty by rearranging
instructions in a double-width memory",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "15--27",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1991:FPP,
author = "Roland L. Lee and Alex Y. Kwok and Fay{\'e} A.
Briggs",
title = "The floating point performance of a superscalar
{SPARC} processor",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "28--37",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Callahan:1991:SP,
author = "David Callahan and Ken Kennedy and Allan Porterfield",
title = "Software prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "40--52",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sohi:1991:HBD,
author = "Gurindar S. Sohi and Manoj Franklin",
title = "High-bandwidth data memory systems for superscalar
processors",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "53--62",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lam:1991:CPO,
author = "Monica D. Lam and Edward E. Rothberg and Michael E.
Wolf",
title = "The cache performance and optimizations of blocked
algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "63--74",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mogul:1991:ECS,
author = "Jeffrey C. Mogul and Anita Borg",
title = "The effect of context switches on cache performance",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "75--84",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keppel:1991:PIF,
author = "David Keppel",
title = "A portable interface for on-the-fly instruction space
modification",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "86--95",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Appel:1991:VMP,
author = "Andrew W. Appel and Kai Li",
title = "Virtual memory primitives for user programs",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "96--107",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anderson:1991:IAO,
author = "Thomas E. Anderson and Henry M. Levy and Brian N.
Bershad and Edward D. Lazowska",
title = "The interaction of architecture and operating system
design",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "108--120",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bradlee:1991:IRA,
author = "David G. Bradlee and Susan J. Eggers and Robert R.
Henry",
title = "Integrating register allocation and instruction
scheduling for {RISCs}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "122--131",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Benitez:1991:CGS,
author = "Manuel E. Benitez and Jack W. Davidson",
title = "Code generation for streaming: an access\slash execute
mechanism",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "132--141",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bagrodia:1991:EIH,
author = "Rajive Bagrodia and Sharad Mathur",
title = "Efficient {Implementation} of high-level parallel
programs",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "142--151",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mangione-Smith:1991:VRD,
author = "William Mangione-Smith and Santosh G. Abraham and
Edward S. Davidson",
title = "Vector register design for polycyclic vector
scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "154--163",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Culler:1991:FGP,
author = "David E. Culler and Anurag Sah and Klaus E. Schauser
and Thorsten von Eicken and John Wawrzynek",
title = "Fine-grain parallelism with minimal hardware support:
a compiler-controlled threaded abstract machine",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "164--175",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wall:1991:LIL,
author = "David W. Wall",
title = "Limits of instruction-level parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "176--188",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1991:PCP,
author = "Edward K. Lee and Randy H. Katz",
title = "Performance consequences of parity placement in disk
arrays",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "190--199",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cate:1991:CCC,
author = "Vincent Cate and Thomas Gross",
title = "Combining the concepts of compression and caching for
a two-level filesystem",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "200--211",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bolosky:1991:NPT,
author = "William J. Bolosky and Michael L. Scott and Robert P.
Fitzgerald and Robert J. Fowler and Alan L. Cox",
title = "{NUMA} policies and their relation to memory
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "212--221",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chaiken:1991:LDS,
author = "David Chaiken and John Kubiatowicz and Anant Agarwal",
title = "{LimitLESS} directories: a scalable cache coherence
scheme",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "224--234",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Min:1991:ECB,
author = "Sang L. Min and Jong-Deok Choi",
title = "An efficient cache-based access anomaly detection
scheme",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "235--244",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gharachorloo:1991:PEM,
author = "Kourosh Gharachorloo and Anoop Gupta and John
Hennessy",
title = "Performance evaluation of memory consistency models
for shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "245--257",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Freudenthal:1991:PCF,
author = "Eric Freudenthal and Allan Gottlieb",
title = "Process coordination with fetch-and-increment",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "260--268",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mellor-Crummey:1991:SC,
author = "John M. Mellor-Crummey and Michael L. Scott",
title = "Synchronization without contention",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "269--278",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:1991:CRB,
author = "Douglas Johnson",
title = "The case for a read barrier",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "279--287",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cmelik:1991:AMS,
author = "Robert F. Cmelik and Shing I. Kong and David R. Ditzel
and Edmund J. Kelly",
title = "An analysis of {MIPS} and {SPARC} instruction set
utilization on the {SPEC} benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "290--302",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hall:1991:PCA,
author = "C. Brian Hall and Kevin O'Brien",
title = "Performance characteristics of architectural features
of the {IBM RISC System\slash 6000}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "303--309",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhandarkar:1991:PAC,
author = "Dileep Bhandarkar and Douglas W. Clark",
title = "Performance from architecture: comparing a {RISC} and
a {CISC} with similar hardware organization",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "2",
pages = "310--319",
month = apr,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeMara:1991:SPA,
author = "R. F. DeMara and D. I. Moldovan",
title = "The {SNAP-1} parallel {AI} prototype",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "2--11",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tan:1991:GEN,
author = "Wei Siong Tan and H. Russ and Cecil O. Alford",
title = "{GT-EP}: a novel high-performance real-time
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "13--21",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Higuchi:1991:IPA,
author = "Tetsuya Higuchi and Tatsumi Furuya and Kenichi Handa
and Naoto Takahashi and Hiroyasu Nishiyama and Akio
Kokubu",
title = "{IXM2}: a parallel associative processor",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "22--31",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kaeli:1991:BHT,
author = "David R. Kaeli and Philip G. Emma",
title = "Branch history table prediction of moving target
branches due to subroutine returns",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "34--42",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Klaiber:1991:ASC,
author = "Alexander C. Klaiber and Henry M. Levy",
title = "An architecture for software-controlled data
prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "43--53",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fu:1991:DPM,
author = "John W. C. Fu and Janak H. Patel",
title = "Data prefetching in multiprocessor vector cache
memories",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "54--63",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harper:1991:RMC,
author = "D. T. {Harper III}",
title = "Reducing memory contention in shared memory
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "66--73",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rau:1991:PRI,
author = "B. Ramakrishna Rau",
title = "Pseudo-randomly interleaved memory",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "74--83",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:1991:EMS,
author = "Kai Li and Karin Petersen",
title = "Evaluation of memory system extensions",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "84--93",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dowd:1991:HPI,
author = "Patrick W. Dowd",
title = "High performance interprocessor communication through
optical wavelength division multiple access channels",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "96--105",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Landin:1991:RFI,
author = "Anders Landin and Erik Hagersten and Seif Haridi",
title = "Race-free interconnection networks and multiprocessor
consistency",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "106--115",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lin:1991:DFM,
author = "Xiaola Lin and Lionel M. Ni",
title = "Deadlock-free multicast wormhole routing in
multicomputer networks",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "116--125",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farrens:1991:DBR,
author = "Matthew Farrens and Arvin Park",
title = "Dynamic base register caching: a technique for
reducing address bus width",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "128--137",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Olukotun:1991:ICH,
author = "O. A. Olukotun and T. N. Mudge and R. B. Brown",
title = "Implementing a cache for a high-performance {GaAs}
microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "138--147",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kurian:1991:CPE,
author = "Lizyamma Kurian and Paul T. Hulina and Lee D. Coraor
and Dhamir N. Mannai",
title = "Classification and performance evaluation of
instruction buffering techniques",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "150--159",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nakajima:1991:OVS,
author = "Masaitsu Nakajima and Hiraku Nakano and Yasuhiro
Nakakura and Tadahiro Yoshida and Yoshiyuki Goi and
Yuji Nakai and Reiji Segawa and Takeshi Kishida and
Hiroshi Kadota",
title = "{OHMEGA}: a {VLSI} superscalar processor architecture
for numerical applications",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "160--168",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vajapeyam:1991:ESC,
author = "Sriram Vajapeyam and Gurindar S. Sohi and Wei-Chung
Hsu",
title = "An empirical study of the {CRAY Y-MP} processor using
the {Perfect Club} benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "170--179",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stephens:1991:ILP,
author = "Chriss Stephens and Bryce Cogswell and John Heinlein
and Gregory Palmer and John P. Shen",
title = "Instruction level profiling and evaluation of the
{IBM\slash 6000}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "180--189",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dimpsey:1991:PPT,
author = "R. T. Dimpsey and R. K. Iyer",
title = "Performance prediction and tuning on a
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "190--199",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oehlrich:1991:PEC,
author = "C. W. Oehlrich and A. Quick",
title = "Performance evaluation of a communication system for
transputer-networks based on monitored event traces",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "202--211",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Konstantinidou:1991:CRA,
author = "S. Konstantinidou and L. Snyder",
title = "Chaos router: architecture and performance",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "212--221",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shukla:1991:SPC,
author = "Shridhar B. Shukla and Dharma P. Agrawal",
title = "Scheduling pipelined communication in distributed
memory multiprocessors for real-time applications",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "222--231",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Adve:1991:DDR,
author = "Sarita V. Adve and Mark D. Hill and Barton P. Miller
and Robert H. B. Netzer",
title = "Detecting data races on weak memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "234--243",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Koldinger:1991:VTD,
author = "Eric J. Koldinger and Susan J. Eggers and Henry M.
Levy",
title = "On the validity of trace-driven simulation for
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "244--253",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gupta:1991:CEL,
author = "Anoop Gupta and John Hennessy and Kourosh Gharachorloo
and Todd Mowry and Wolf-Dietrich Weber",
title = "Comparative evaluation of latency reducing and
tolerating techniques",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "254--263",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:1991:IAF,
author = "Pohua P. Chang and Scott A. Mahlke and William Y. Chen
and Nancy J. Warter and Wen-mei W. Hwu",
title = "{IMPACT}: an architectural framework for
multiple-instruction-issue processors",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "266--275",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Butler:1991:SIS,
author = "Michael Butler and Tse-Yu Yeh and Yale Patt and Mitch
Alsup and Hunter Scales and Michael Shebanow",
title = "Single instruction stream parallelism is greater than
two",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "276--286",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Melvin:1991:EFG,
author = "Stephen Melvin and Yale Patt",
title = "Exploiting fine-grained parallelism through a
combination of hardware and software techniques",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "287--296",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Adve:1991:CHS,
author = "Sarita V. Adve and Vikram S. Adve and Mark D. Hill and
Mary K. Vernon",
title = "Comparison of hardware and software cache coherence
schemes",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "298--308",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Simoni:1991:MPL,
author = "Richard Simoni and Mark Horowitz",
title = "Modeling the performance of limited pointers
directories for cache coherence",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "309--319",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Quammen:1991:FRM,
author = "Donna J. Quammen and D. Richard Miller",
title = "Flexible register management for sequential programs",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "320--329",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bradlee:1991:ERP,
author = "David G. Bradlee and Susan J. Eggers and Robert R.
Henry",
title = "The effect on {RISC} performance of register set size
and structure versus code generation strategy",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "330--339",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Papadopoulos:1991:MRV,
author = "Gregory M. Papadopoulos and Kenneth R. Traub",
title = "Multithreading: a revisionist view of dataflow
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "342--351",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chiueh:1991:MTV,
author = "Tzi-cker Chiueh",
title = "Multi-threaded vectorization",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "352--361",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farrens:1991:SAI,
author = "Matthew K. Farrens and Andrew R. Pleszkun",
title = "Strategies for achieving improved processor
throughput",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "362--369",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kagimasa:1991:ASM,
author = "Toyohiko Kagimasa and Kikuo Takahashi and Toshiaki
Mori and Seiichi Yoshizumi",
title = "Adaptive storage management for very large
virtual\slash real storage systems",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "372--379",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hall:1991:VVA,
author = "Judith S. Hall and Paul T. Robinson",
title = "Virtualizing the {VAX} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "380--389",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Akella:1991:MMI,
author = "Janaki Akella and Daniel P. Siewiorek",
title = "Modeling and measurement of the impact of {Input\slash
Output} on system performance",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "3",
pages = "390--399",
month = may,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilson:1991:PSP,
author = "Paul R. Wilson",
title = "Pointer swizzling at page fault time: efficiently
supporting huge address spaces on standard hardware",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "6--13",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kuga:1991:DDH,
author = "Morihiro Kuga and Kazuaki Murakami and Shinji Tomita",
title = "{DSNS} (dynamically-hazard-resolved
statically-code-scheduled, nonuniform superscalar): yet
another superscalar processor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "14--29",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ponder:1991:PVA,
author = "Carl Ponder",
title = "Performance variation across benchmark suites",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "30--36",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Conte:1991:BSB,
author = "Thomas M. Conte and Wen-mei W. Hwu",
title = "A brief survey of benchmark usage in the architecture
community",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "37--44",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Morris:1991:CER,
author = "Todd D. Morris and Edward F. Gehringer",
title = "A cost-effective reliable multipath interconnection
network",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "45--65",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Laplante:1991:ICB,
author = "P. A. Laplante",
title = "An improved conditional branching scheme for a single
instruction computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "66--68",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DuBois:1991:DED,
author = "Andrew J. DuBois and John Rasure",
title = "Design and evaluation of a distributed asynchronous
{VLSI} crossbar switch controller for a packet switched
supercomputer network",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "69--79",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lass:1991:CCP,
author = "Stanley E. Lass",
title = "The compiler controlled pack cache and messaging",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "80--85",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ungerer:1991:MLP,
author = "Theo Ungerer and Eberhard Zehendner",
title = "A multi-level parallelism architecture",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "86--93",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matthes:1991:HMO,
author = "Wolfgang Matthes",
title = "How many operation units are adequate?",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "94--108",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cunha:1991:AMM,
author = "Alberto R. Cunha and Carlos N. Ribeiro and Jos{\'e} A.
Marques",
title = "The architecture of a memory management unit for
object-oriented systems",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "109--116",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matloff:1991:AAS,
author = "Norman Matloff",
title = "An argument against scalable cache coherency",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "117--123",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rodohan:1991:OAO,
author = "D. P. Rodohan and R. J. Glover",
title = "An overview of the {A} architecture for optimisation
problems in a logic programming environment",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "124--131",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wray:1991:TSD,
author = "Stuart C. Wray",
title = "Time-sequenced {DMA} for multimedia computers",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "132--137",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramamoorthy:1991:BMC,
author = "Ganesh Ramamoorthy and Alok N. Choudhary",
title = "A bibliography for multiprocessor cache memories",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "138--153",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:1991:SBC,
author = "Alan Jay Smith",
title = "Second bibliography on {Cache} memories",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "154--182",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1991:UNb,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "4",
pages = "185--191",
month = jun,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:06 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patterson:1991:TGS,
author = "David A. Patterson",
title = "Towards guidelines for {SIGARCH} sponsored
conferences",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "5",
pages = "7--7",
month = sep,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maa:1991:TED,
author = "Yeong-Chang Maa and Dhiraj K. Pradhan and Dominique
Thi{\'e}baut",
title = "Two economical directory schemes for large-scale cache
coherent multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "5",
pages = "10--10",
month = sep,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1991:UNc,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "5",
pages = "21--26",
month = sep,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ivanovic:1991:BRC,
author = "Vladimir G. Ivanovic",
title = "Book review: {{\em Computation Structures\/}} by
{Stephen A Ward and Robert H. Halstead, Jr. (MIT Press
or McGraw-Hill, 1990)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "5",
pages = "27--29",
month = sep,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Krieger:1991:BRM,
author = "Moshe Krieger",
title = "Book review: {{\em Multiprocessors\/}} by {D. Tabak
(Prentice-Hall, 1990)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "5",
pages = "27--29",
month = sep,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fulcher:1991:BRM,
author = "John Fulcher",
title = "Book review: {{\em The 68000 and 68020
Microprocessors: Hardware, Software and Interfacing
Techniques\/}} by {W. Triebel and A. Singh (Prentice
Hall, 1991)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "5",
pages = "29--30",
month = sep,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baker:1991:PIS,
author = "Henry G. Baker",
title = "Precise instruction scheduling without a precise
machine model",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "6",
pages = "4--8",
month = dec,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McLaughlin:1991:LAB,
author = "Robert McLaughlin",
title = "Look-ahead branching hardware",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "6",
pages = "9--11",
month = dec,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beth:1991:RCI,
author = "Thomas Beth and Volker Hatz",
title = "A restricted crossbar implementation and its
applications",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "6",
pages = "12--16",
month = dec,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1991:UNd,
author = "Mark Thorson",
title = "{Usenet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "6",
pages = "19--23",
month = dec,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bernecky:1991:BRP,
author = "Robert Bernecky",
title = "Book review: {{\em Past, Present, Parallel: A Survey
of Available Parallel Computing Systems\/}} by {Arthur
Trew \& Greg Wilson (Eds.), (Springer-Verlag 1991)}",
journal = j-COMP-ARCH-NEWS,
volume = "19",
number = "6",
pages = "24--25",
month = dec,
year = "1991",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:27 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singh:1992:SSP,
author = "Jaswinder Pal Singh and Wolf-Dietrich Weber and Anoop
Gupta",
title = "{SPLASH}: {Stanford} parallel applications for
shared-memory",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "1",
pages = "5--44",
month = mar,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wajda:1992:SSP,
author = "Eligiusz Wajda",
title = "{SPIRE}: streaming processing with instructions
release element",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "1",
pages = "45--54",
month = mar,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Deville:1992:CRP,
author = "Yannick Deville and Jean Gobert",
title = "A class of replacement policies for medium and
high-associativity structures",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "1",
pages = "55--64",
month = mar,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zucker:1992:PSM,
author = "Richard N. Zucker and Jean-Loup Baer",
title = "A performance study of memory consistency models",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "2--12",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keleher:1992:LRC,
author = "Pete Keleher and Alan L. Cox and Willy Zwaenepoel",
title = "Lazy release consistency for software distributed
shared memory",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "13--21",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gharachorloo:1992:HML,
author = "Kourosh Gharachorloo and Anoop Gupta and John
Hennessy",
title = "Hiding memory latency using dynamic scheduling in
shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "22--33",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fernandes:1992:EBB,
author = "Edil S. T. Fernandes and Fernando M. B. Barbosa",
title = "Effects of building blocks on the performance of
super-scalar architecture",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "36--45",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lam:1992:LCF,
author = "Monica S. Lam and Robert P. Wilson",
title = "Limits of control flow on parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "46--57",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Franklin:1992:ESW,
author = "Manoj Franklin and Gurindar S. Sohi",
title = "The expandable split window paradigm for exploiting
fine-grain parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "58--67",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Litaize:1992:TSM,
author = "Daniel Litaize and Abdelaziz Mzoughi and Christine
Rochange and Pascal Sainrat",
title = "Towards a shared-memory massively parallel
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "70--79",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stenstrom:1992:CPE,
author = "Per Stenstr{\"o}m and Truman Joe and Anoop Gupta",
title = "Comparative performance evaluation of cache-coherent
{NUMA} and {COMA} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "80--91",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lenoski:1992:DPI,
author = "Daniel Lenoski and James Laudon and Truman Joe and
David Nakahira and Luis Stevens and Anoop Gupta and
John Hennessy",
title = "The {DASH} prototype: implementation and performance",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "92--103",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Intrater:1992:PED,
author = "Gideon Intrater and Ilan Spillinger",
title = "Performance evaluation of a decoded instruction cache
for variable instruction-length computers",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "106--113",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1992:SBS,
author = "J. Bradley Chen and Anita Borg and Norman P. Jouppi",
title = "A simulation based study of {TLB} performance",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "114--123",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yeh:1992:AIT,
author = "Tse-Yu Yeh and Yale N. Patt",
title = "Alternative implementations of two-level adaptive
branch prediction",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "124--134",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hirata:1992:EPA,
author = "Hiroaki Hirata and Kozo Kimura and Satoshi Nagamine
and Yoshiyuki Mochizuki and Akio Nishimura and
Yoshimori Nakase and Teiji Nishizawa",
title = "An elementary processor architecture with simultaneous
instruction issuing from multiple threads",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "136--145",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sato:1992:TBP,
author = "Mitsuhisa Sato and Yuetsu Kodama and Shuichi Sakai and
Yoshinori Yamaguchi and Yasuhito Koumura",
title = "Thread-based programming for the {EM-4} hybrid
dataflow machine",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "146--155",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nikhil:1992:MMP,
author = "R. S. Nikhil and G. M. Papadopoulos and Arvind",
title = "{T}: a multithreaded massively parallel architecture",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "156--167",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dubnicki:1992:ABS,
author = "Czarek Dubnicki and Thomas J. LeBlanc",
title = "Adjustable block size coherent caches",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "170--180",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Olukotun:1992:POP,
author = "Kunle Olukotun and Trevor Mudge and Richard Brown",
title = "Performance optimization of pipelined primary cache",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "181--190",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McFarling:1992:CRD,
author = "Scott McFarling",
title = "Cache replacement with dynamic exclusion",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "191--200",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keckler:1992:PCI,
author = "Stephem W. Keckler and William J. Dally",
title = "Processor coupling: integrating compile time and
runtime scheduling for parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "202--213",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Boothe:1992:IMT,
author = "Bob Boothe and Abhiram Ranade",
title = "Improved multithreading techniques for hiding
communication latency in multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "214--223",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeGloria:1992:ILP,
author = "Alessandro {De Gloria} and Paolo Faraboschi",
title = "Instruction-level parallelism in {Prolog}: analysis
and architectural support",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "224--233",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kurian:1992:MLE,
author = "Lizyamma Kurian and Paul T. Hulina and Lee D. Coraor",
title = "Memory latency effects in decoupled architectures with
a single data memory module",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "236--245",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1992:IPS,
author = "Andr{\'e} Seznec and Jacques Lenfant",
title = "Interleaved parallel schemes: improving memory
throughput on supercomputers",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "246--255",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{vonEicken:1992:AMM,
author = "Thorsten von Eicken and David E. Culler and Seth Copen
Goldstein and Klaus Erik Schauser",
title = "Active messages: a mechanism for integrated
communication and computation",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "256--266",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chien:1992:PAR,
author = "Andrew A. Chien and Jae H. Kim",
title = "Planar-adaptive routing: low-cost adaptive networks
for multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "268--277",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Glass:1992:TMA,
author = "Christopher J. Glass and Lionel M. Ni",
title = "The turn model for adaptive routing",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "278--287",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shimizu:1992:LLM,
author = "Toshiyuki Shimizu and Takeshi Horie and Hiroaki
Ishihata",
title = "Low-latency message communication support for the
{AP1000}",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "288--297",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aichinger:1992:FBP,
author = "Barbara P. Aichinger",
title = "{Futurebus+} as an {I/O} bus: profile {B}",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "300--307",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reddy:1992:SSO,
author = "A. L. Narasimha Reddy",
title = "A study of {I/O} system organizations",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "308--317",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Menon:1992:CSA,
author = "Jai Menon and Dick Mattson",
title = "Comparison of sparing alternatives for disk arrays",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "318--329",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Siegle:1992:MPB,
author = "Markus Siegle and Richard Hofmann",
title = "Monitoring program behaviour on {SUPRENUM}",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "332--341",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Austin:1992:DDA,
author = "Todd M. Austin and Gurindar S. Sohi",
title = "Dynamic dependency analysis of ordinary programs",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "342--351",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Najjar:1992:ALL,
author = "Walid A. Najjar and W. Marcus Miller and A. P. Wim
B{\"o}hm",
title = "An analysis of loop latency in dataflow execution",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "352--360",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yang:1992:NCD,
author = "Qing Yang and Liping Wu Yang",
title = "A novel cache design for vector processing",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "362--371",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Valero:1992:INS,
author = "Mateo Valero and Tom{\'a}s Lang and Jos{\'e} M.
Llaber{\'\i}a and Montse Peiron and Eduard Ayguad{\'e}
and Juan J. Navarra",
title = "Increasing the number of strides for conflict-free
vector access",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "372--381",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wulf:1992:EWA,
author = "Wm. A. Wulf",
title = "Evaluation of the {WM} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "382--390",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:1992:ICL,
author = "Kirk L. Johnson",
title = "The impact of communication locality on large-scale
multiprocessor performance",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "392--402",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Scott:1992:PSR,
author = "Steven L. Scott and James R. Goodman and Mary K.
Vernon",
title = "Performance of the {SCI} ring",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "403--414",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Talluri:1992:TST,
author = "Madhusudhan Talluri and Shing Kong and Mark D. Hill
and David A. Patterson",
title = "Tradeoffs in supporting two page sizes",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "415--424",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Louri:1992:PEO,
author = "Ahmed Louri and Jongwhoa Na",
title = "Parallel electro-optical rule-based system for fast
execution of expert systems (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "427--427",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1992:OAF,
author = "Andr{\'e} Seznec and Karl Courtel",
title = "{OPAC} (abstract): a floating-point coprocessor
dedicated to compute-bound kernels",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "427--427",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheng:1992:TCB,
author = "Der-Chung Cheng and Kanad Ghose",
title = "The time-constrained barrier synchronizer and its
applications in parallel systems (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "428--428",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Louri:1992:NCD,
author = "Ahmed Louri and Hongki Sung",
title = "A new compiler-directed cache coherence scheme for
shared memory multiprocessors with fast and parallel
explicit invalidation (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "428--428",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singh:1992:AGP,
author = "Gautam B. Singh",
title = "Architecture of a graphics processor (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "429--429",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yomtov:1992:PED,
author = "Ruben Yomtov",
title = "Performance evaluation of disk subsystems",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "429--429",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lai:1992:EBS,
author = "Feipei Lai and Meng-chou Chang",
title = "Enhancing boosting with semantic register in a
superscalar processor (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "430--430",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sklenar:1992:PUVa,
author = "Ivan Sklenar",
title = "Prefetch unit for vector operations on scalar
computers (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "430--430",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Newman:1992:MMSa,
author = "Gary Newman",
title = "Memory management support for tiled array organization
(abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "431--431",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Uht:1992:DPI,
author = "Augustus K. Uht and Darin B. Johnson",
title = "Data path issues in a highly concurrent machine
(abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "431--431",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fineberg:1992:SLT,
author = "Samuel A. Fineberg and Thomas L. Casavant and Brent H.
Pease",
title = "Seamless --- a latency-tolerant {RISC}-based
multiprocessor architecture (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "432--432",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sayeed:1992:PMB,
author = "M. A. Sayeed and M. Atiquzzaman",
title = "Performance of multiple-bus multiprocessor under
non-uniform memory reference model (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "432--432",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kechadi:1992:PIV,
author = "M. Tahar Kechadi and J-L. Dekeyser and Ph. Marquet and
Ph. Preux",
title = "Performance improvement for vector pipeline
multiprocessor systems using a disordered execution
model(abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "433--433",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Varma:1992:CPS,
author = "Anujan Varma and Gunjan Sinha",
title = "A class of prefetch schemes for on-chip data caches",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "433--433",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abnous:1992:PBV,
author = "Arthur Abnous and Nader Bagherzadeh",
title = "Pipelining and bypassing in a {VLIW} processor
(abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "434--434",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Prakash:1992:SAS,
author = "Shiv Prakash and Alice C. Parker",
title = "Synthesis of application-specific heterogeneous
multiprocessor systems (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "434--434",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farrens:1992:PTL,
author = "Matthew Farrens and Arvin Park and Rob Fanfelle and
Pius Ng and Gary Tyson",
title = "A partitioned translation lookaside buffer approach to
reducing address bandwidth (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "435--435",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Laudon:1992:AIT,
author = "James Laudon and Anoop Gupta and Mark Horowitz",
title = "Architectural and implementation tradeoffs in the
design of multiple-context processors (abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "435--435",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alleyne:1992:EDN,
author = "Brian D. Alleyne and Isaac D. Scherson",
title = "Expanded delta networks for very large parallel
computers",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "436--436",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singh:1992:IHB,
author = "Jaswinder Pal Singh",
title = "Implications of hierarchical {N-body} methods for
multiprocessor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "436--436",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Michael:1992:DBC,
author = "Wisam Michael",
title = "Directory-based cache coherency protocol for a
ring-connected multiprocessor-array",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "437--437",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:1992:RCD,
author = "Wen-Hann Wang and Jim Quinlan and Konrad Lai",
title = "Revisit the case for direct-mapped chaches: a case for
two-way set-associative level-two caches",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "437--437",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Culler:1992:AMM,
author = "David E. Culler and Michial Gunter and James C. Lee",
title = "Analysis of multithreaded microprocessors under
multiprogramming",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "438--438",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wittenbrink:1992:CWG,
author = "C. M. Wittenbrink and A. K. Somani and C. H. Chen",
title = "Cache write generate for high performance parallel
processing",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "438--438",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burkhardt:1992:ICA,
author = "Walter H. Burkhardt and Stefan Rust",
title = "Integrated computer architecture development system",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "2",
pages = "439--439",
month = may,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:43 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chevance:1992:EMM,
author = "R. J. Chevance",
title = "An evaluation methodology for microprocessor and
system architecture",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "3",
pages = "4--13",
month = jun,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Laird:1992:CTC,
author = "Michael Laird",
title = "A comparison of three current superscalar designs",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "3",
pages = "14--21",
month = jun,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dongarra:1992:PVC,
author = "Jack J. Dongarra",
title = "Performance of various computers using standard linear
equations software",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "3",
pages = "22--44",
month = jun,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keown:1992:PHR,
author = "William F. {Keown, Jr.} and Philip {Koopman, Jr.} and
Aaron Collins",
title = "Performance of the {HARRIS RTX 2000} stack
architecture versus the {Sun 4 SPARC} and the {Sun 3
M68020} Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "3",
pages = "45--52",
month = jun,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1992:UNa,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "3",
pages = "56--62",
month = jun,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chalterjee:1992:BRI,
author = "Siddhartha Chalterjee",
title = "Book review: {{\em The Impact of Vector and Parallel
Architectures on the Gaussian Elimination Algorithm\/}}
by {Yves Robert (Manchester University Press and
Halsted Press, 1991)}",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "3",
pages = "63--64",
month = jun,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Esponda:1992:GCR,
author = "Margarita Esponda and Ra{\'u}l Rojas",
title = "A graphical comparison of {RISC} processors",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "2--8",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matsui:1992:DRM,
author = "Shogo Matsui",
title = "Dynamic refresh method for dynamic {RAMs}",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "9--16",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Park:1992:CRS,
author = "Arvin Park and Ron Maeder",
title = "Codes to reduce switching transients across {VLSI I/O}
pins",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "17--21",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Newman:1992:MMSb,
author = "Gary Newman",
title = "Memory management support for tiled array
organization",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "22--30",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sklenar:1992:PUVb,
author = "Ivan Sklen{\'a}{\v{r}}",
title = "Prefetch unit for vector operations on scalar
computers",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "31--37",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Malik:1992:ILP,
author = "Nadeem Malik and Richard J. Eickemeyer and Stamatis
Vassiliadis",
title = "Instruction-level parallelism from execution interlock
collapsing",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "38--43",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vassiliadis:1992:ASO,
author = "Stamatis Vassiliadis and Bart Blaner and Richard J.
Eickemeyer",
title = "On the attributes of the {SCISM} organization",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "44--53",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1992:UNb,
author = "Mark Thorson",
title = "{Usenet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "56--64",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Allen:1992:BRC,
author = "Ken Allen",
title = "Book review: {{\em Computing with Parallel
Architectures: T.Node\/}}, edited by {D. Gassilloud and
J. C. Grossetie (Kluwer Academic Publishers 1991)}",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "4",
pages = "65--66",
month = sep,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Michael:1992:FMB,
author = "Gavin Michael and Andrew Chien",
title = "Future multicomputers: beyond minimalist
multiprocessors?",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "5",
pages = "6--12",
month = dec,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kaushal:1992:CHH,
author = "R. P. Kaushal and J. S. Bedi",
title = "Comparison of hypercube, hypernet, and symmetric
hypernet architectures",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "5",
pages = "13--25",
month = dec,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1992:UNc,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "5",
pages = "28--33",
month = dec,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Levy:1992:BRN,
author = "David Levy",
title = "Book review: {{\em Neural Networks and Fuzzy Systems:
A Dynamical Systems Approach to Machine
Intelligence\/}} by {Bart Kosko (Prentice Hall 1992)}",
journal = j-COMP-ARCH-NEWS,
volume = "20",
number = "5",
pages = "34--34",
month = dec,
year = "1992",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Inoue:1993:PEV,
author = "Atsushi Inoue and Kenji Takeda",
title = "Performance evaluation for various configuration of
superscalar processors",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "1",
pages = "4--11",
month = mar,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Uht:1993:EMIa,
author = "Augustus K. Uht",
title = "Extraction of massive instruction level parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "1",
pages = "12--14",
month = mar,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ullah:1993:MIP,
author = "Nasr Ullah and Matt Holle",
title = "The {MC88110} implementation of precise exceptions in
a superscalar architecture",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "1",
pages = "15--25",
month = mar,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Deville:1993:PDP,
author = "Yannick Deville",
title = "A process-dependent partitioning strategy for cache
memories",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "1",
pages = "26--33",
month = mar,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1993:UNa,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "1",
pages = "36--38",
month = mar,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Staff:1993:BR,
author = "{ACM SIGARCH Computer Architecture News Staff}",
title = "Book reviews",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "1",
pages = "39--39",
month = mar,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:33 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cypher:1993:ARP,
author = "R. Cypher and A. Ho and S. Konstantinidou and P.
Messina",
title = "Architectural requirements of parallel scientific
applications with explicit communication",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "2--13",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rothberg:1993:WSC,
author = "Edward Rothberg and Jaswinder Pal Singh and Anoop
Gupta",
title = "Working sets, cache sizes, and node granularity issues
for large-scale multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "14--26",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nagle:1993:DTS,
author = "David Nagle and Richard Uhlig and Tim Stanley and
Stuart Sechrest and Trevor Mudge and Richard Brown",
title = "Design tradeoffs for software-managed {TLBs}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "27--38",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huck:1993:AST,
author = "Jerry Huck and Jim Hays",
title = "Architectural support for translation table management
in large address space machines",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "39--50",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cao:1993:TPR,
author = "Pei Cao and Swee Boon Lim and Shivakumar Venkataraman
and John Wilkes",
title = "The {TickerTAIP} parallel {RAID} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "52--63",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stodolsky:1993:PLO,
author = "Daniel Stodolsky and Garth Gibson and Mark Holland",
title = "Parity logging overcoming the small write problem in
redundant disk arrays",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "64--75",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Menon:1993:AFT,
author = "Jai Menon and Jim Cortney",
title = "The architecture of a fault-tolerant cached {RAID}
controller",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "76--87",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dubois:1993:DEU,
author = "Michel Dubois and Jonas Skeppstedt and Livio Ricciulli
and Krishnan Ramamurthy and Per Stenstr{\"o}m",
title = "The detection and elimination of useless misses in
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "88--97",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cox:1993:ACC,
author = "Alan L. Cox and Robert J. Fowler",
title = "Adaptive cache coherency for detecting migratory
shared data",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "98--108",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stenstrom:1993:ACC,
author = "Per Stenstr{\"o}m and Mats Brorsson and Lars
Sandberg",
title = "An adaptive cache coherence protocol optimized for
migratory sharing",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "109--118",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Waldspurger:1993:RRF,
author = "Carl A. Waldspurger and William E. Weihl",
title = "Register relocation: flexible contexts for
multithreading",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "120--130",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hidaka:1993:MTC,
author = "Yasuo Hidaka and Hanpei Koike and Hidehiko Tanaka",
title = "Multiple threads in cyclic register windows",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "131--142",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dwarkadas:1993:ERC,
author = "Sandhya Dwarkadas and Peter Keleher and Alan L. Cox
and Willy Zwaenepoel",
title = "Evaluation of release consistent software distributed
shared memory on emerging network technology",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "144--155",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wood:1993:MCS,
author = "David A. Wood and Satish Chandra and Babak Falsafi and
Mark D. Hill and James R. Larus and Alvin R. Lebeck and
James C. Lewis and Shubhendu S. Mukherjee and Subbarao
Palacharla and Steven K. Reinhardt",
title = "Mechanisms for cooperative shared memory",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "156--167",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1993:CTW,
author = "Andr{\'e} Seznec",
title = "A case for two-way skewed-associative caches",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "169--178",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:1993:CAC,
author = "Anant Agarwal and Stephen D. Pudar",
title = "Column-associative caches: a technique for reducing
the miss rate of direct-mapped caches",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "179--190",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:1993:CWP,
author = "Norman P. Jouppi",
title = "Cache write policies and performance",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "191--201",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Boyd:1993:HPM,
author = "Eric L. Boyd and Edward S. Davidson",
title = "Hierarchical performance modeling with {MACS}: a case
study of the {Convex C-240}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "203--210",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kuck:1993:CSI,
author = "D. Kuck and E. Davidson and D. Lawrie and A. Sameh and
C. Q. Zhu and A. Veidenbaum and J. Konicek and P. Yew
and K. Gallivan and W. Jalby and H. Wijshoff and R.
Bramley and U. M. Yang and P. Emrath and D. Padua and
R. Eigenmann and J. Hoeflinger and G. Jaxon and Z. Li
and T. Murphy and J. Andrews",
title = "The cedar system and an initial performance study",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "213--223",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Noakes:1993:JMM,
author = "Michael D. Noakes and Deborah A. Wallach and William
J. Dally",
title = "The {J-machine} multicomputer: an architectural
evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "224--235",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bunda:1993:BVB,
author = "John Bunda and Don Fussell and W. C. Athas and Roy
Jenevein",
title = "16-bit vs. 32-bit instructions for pipelined
microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "237--246",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kiyohara:1993:RCN,
author = "Tokuzo Kiyohara and Scott Mahlke and William Chen and
Roger Bringmann and Richard Hank and Sadun Anik and
Wen-Mei Hwu",
title = "Register connection: a new approach to adding
registers into instruction set architectures",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "247--256",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yeh:1993:CDB,
author = "Tse-Yu Yeh and Yale N. Patt",
title = "A comparison of dynamic branch predictors that use two
levels of branch history",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "257--266",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barroso:1993:PCC,
author = "Luis Andr{\'e} Barroso and Michel Dubois",
title = "The performance of cache-coherent ring-based
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "268--277",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tullsen:1993:LCP,
author = "Dean M. Tullsen and Susan J. Eggers",
title = "Limitations of cache prefetching on a bus-based
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "278--288",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Herlihy:1993:TMA,
author = "Maurice Herlihy and J. Eliot B. Moss",
title = "Transactional memory: architectural support for
lock-free data structures",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "289--300",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Spertus:1993:EMF,
author = "Ellen Spertus and Seth Copen Goldstein and Klaus Erik
Schauser and Thorsten von Eicken and David E. Culler
and William J. Dally",
title = "Evaluation of mechanisms for fine-grained parallel
programs in the {J-machine} and the {CM-5}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "302--313",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Horie:1993:IAP,
author = "Takeshi Horie and Kenichi Hayashi and Toshiyuki
Shimizu and Hiroaki Ishihata",
title = "Improving {AP1000} parallel computer performance with
message communication",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "314--325",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1993:PCD,
author = "W.-C. Hsu and J. E. Smith",
title = "Performance of cached {DRAM} organizations in vector
supercomputers",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "327--336",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gao:1993:CRT,
author = "Q. S. Gao",
title = "The {Chinese} remainder theorem and the prime memory
system",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "337--340",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1993:OMS,
author = "Andr{\'e} Seznec and Jacques Lenfant",
title = "Odd memory systems may be quite interesting",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "341--350",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Boppana:1993:CAW,
author = "Rajendra V. Boppana and Suresh Chalasani",
title = "A comparison of adaptive wormhole routing algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "2",
pages = "351--360",
month = may,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Uht:1993:EMIb,
author = "Augustus K. Uht",
title = "Extraction of massive instruction level parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "5--12",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramanathan:1993:SCP,
author = "Gowri Ramanathan and Joel Oren",
title = "Survey of commercial parallel machines",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "13--33",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ewy:1993:SCP,
author = "Benjamin J. Ewy and Joseph B. Evans",
title = "Secondary cache performance in {RISC} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "34--37",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Danesh:1993:PLC,
author = "Iraj Danesh",
title = "Physical limitations of a computer",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "40--45",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1993:UNb,
author = "Mark Thorson",
title = "{Usenet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "46--49",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fostel:1993:BRP,
author = "Gary Fostel",
title = "Book Reviews: {{\em Principles of Computer Systems\/}}
by {Gerald M. Karam \& John C. Bryant (Prentice Hall
1992)}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "50--51",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fostel:1993:BRC,
author = "Gary Fostel",
title = "Book Review: {{\em Computer Architecture\/}} by {Mario
De Blasi (Addison-Wesley Publishing Company, 1990)}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "51--53",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fulcher:1993:BRP,
author = "John Fulcher",
title = "Book Review: {{\em Practical Parallel Computing\/}} by
{Paul Messina and Almerico Murli, Editors (John Wiley
and Sons, 1992)}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "3",
pages = "53--54",
month = jun,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:56 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hill:1993:WAR,
author = "Mark D. Hill and James R. Larus and Alvin R. Lebeck
and Madhusudhan Talluri and David A. Wood",
title = "{Wisconsin Architectural Research Tool Set}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "4",
pages = "8--10",
month = sep,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hyatt:1993:HPO,
author = "Craig Hyatt",
title = "A high-performance object-oriented memory",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "4",
pages = "11--19",
month = sep,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dewan:1993:CUM,
author = "Gautam Dewan and V. S. S. Nair",
title = "A case for uniform memory access multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "4",
pages = "20--26",
month = sep,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1993:UNc,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "4",
pages = "27--28",
month = sep,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Langdon:1993:BR,
author = "Glen Langdon",
title = "Book Reviews",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "4",
pages = "29--29",
month = sep,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jain:1993:ISI,
author = "Ravi Jain and John Werth and J. C. Browne",
title = "Introduction to the {Special Issue on Input\slash
Output in Parallel Computer Systems}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "5--6",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Corbett:1993:OVP,
author = "Peter F. Corbett and Sandra Johnson Baylor and Dror G.
Feitelson",
title = "Overview of the {Vesta} parallel file system",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "7--14",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lin:1993:PIA,
author = "Z. Lin and S. Zhou",
title = "Parallelizing {I/O} intensive applications for a
workstation cluster: a case study",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "15--22",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fineberg:1993:INA,
author = "Samuel A. Fineberg",
title = "Implementing the {NHT-1} application {I/O} benchmark",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "23--30",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{delRosario:1993:IPT,
author = "Juan Miguel del Rosario and Rajesh Bordawekar and Alok
Choudhary",
title = "Improved parallel {I/O} via a two-phase run-time
access strategy",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "31--38",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghandeharizadeh:1993:OTS,
author = "Shahram Ghandeharizadeh and Cyrus Shahabi and Luis
Ramos",
title = "An overview of techniques to support continuous
retrieval of multimedia objects",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "39--46",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jain:1993:SPO,
author = "Ravi Jain and Kiran Somalwar and John Werth and J. C.
Browne",
title = "Scheduling parallel {I/O} operations",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "47--54",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:1993:TTF,
author = "Qiang Li and Naphtali Rishe",
title = "A transputer {T9000} family based architecture for
parallel database machines",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "55--62",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Assmann:1993:RPA,
author = "Claus A{\ss}mann",
title = "A {RISC} processor architecture with a versatile stack
system",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "63--70",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:1993:NDH,
author = "Dajin Wang",
title = "A note on {``Diagnosabilities of hypercubes under the
pessimistic one-step diagnosis strategy''}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "71--78",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1993:UNd,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "79--85",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alverson:1993:BRH,
author = "Bob Alverson",
title = "Book Review: {{\em High-Speed Digital Design: A
Handbook of Black Magic\/}} by {Howard W. Johnson and
Martin Graham (Prentice-Hall, 1993)}",
journal = j-COMP-ARCH-NEWS,
volume = "21",
number = "5",
pages = "85--86",
month = dec,
year = "1993",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:19 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Iannucci:1994:AII,
author = "Robert Iannucci and Anant Agarwal and Bill Dally and
Anoop Gupta and Greg Papadopoulos and Burton Smith",
title = "Architectural and implementation issues for
multithreading (panel session {I})",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "1",
pages = "3--18",
month = mar,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Halstead:1994:PCR,
author = "Burt Halstead and David Callahan and Jack Dennis and
R. S. Nikhil and Vivek Sarkar",
title = "Programming, compilation, and resource management
issues for multithreading (panel session {II})",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "1",
pages = "19--33",
month = mar,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baker:1994:LLP,
author = "Henry G. Baker",
title = "Linear logic and permutation stacks---the {Forth}
shall be first",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "1",
pages = "34--43",
month = mar,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mendlson:1994:CTI,
author = "Abraham Mendlson and Shlomit S. Pinter and Ruth
Shtokhamer",
title = "Compile time instruction cache optimizations",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "1",
pages = "44--51",
month = mar,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barach:1994:HVF,
author = "David Barach and Jaspal Kohli and John Slice and Marc
Spaulding and Rajeev Bharadhwaj and Don Hudson and
Cliff Neighbors and Nirmal Saxena and Rolland Crunk",
title = "{HALSIM}---a very fast {SPARC V9} behavioral model",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "1",
pages = "52--58",
month = mar,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1994:UNa,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "1",
pages = "59--60",
month = mar,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Madruga:1994:BRS,
author = "Ewerton Longoni Madruga",
title = "Book Review: {{\em SNMP, SNMPv2, and CMIP: The
Practical Guide to Network Management Standards\/}} by
{William Stallings (Addison-Wesley Publishing Company
Inc. 1993)}",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "1",
pages = "60--61",
month = mar,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Calder:1994:FAI,
author = "B. Calder and D. Grunwald",
title = "Fast and accurate instruction fetch and branch
prediction",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "2--11",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Talcott:1994:IUB,
author = "A. R. Talcott and W. Yamamoto and M. J. Serrano and R.
C. Wood and M. Nemirovsky",
title = "The impact of unresolved branches on branch prediction
scheme performance",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "12--21",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Palacharla:1994:ESB,
author = "S. Palacharla and R. E. Kessler",
title = "Evaluating stream buffers as a secondary cache
replacement",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "24--33",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:1994:TTL,
author = "N. P. Jouppi and S. J. E. Wilton",
title = "Tradeoffs in two-level on-chip caching",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "34--45",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singhal:1994:ASP,
author = "A. Singhal and A. J. Goldberg",
title = "Architectural support for performance tuning: a case
study on the {SPARCcenter 2000}",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "48--59",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cvetanovic:1994:CAA,
author = "Z. Cvetanovic and D. Bhandarkar",
title = "Characterization of {Alpha AXP} performance using {TP}
and {SPEC} workloads",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "60--70",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Natarajan:1994:MBC,
author = "C. Natarajan and S. Sharma and R. K. Iyer",
title = "Measurement-based characterization of global memory
and network contention, operating system and
parallelization overheads",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "71--80",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Joe:1994:EMO,
author = "T. Joe and J. L. Hennessy",
title = "Evaluating the memory overhead required for {COMA}
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "82--93",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Klaiber:1994:CMP,
author = "A. C. Klaiber and H. M. Levy",
title = "A comparison of message passing and shared memory
architectures for data parallel programs",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "94--105",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cox:1994:SVH,
author = "A. L. Cox and S. Dwarkadas and P. Keleher and H. Lu
and R. Rajamony and W. Zwaenepoel",
title = "Software versus hardware shared-memory implementation:
a case study",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "106--117",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pnevmatikatos:1994:GEB,
author = "D. N. Pnevmatikatos and G. S. Sohi",
title = "Guarded execution and branch prediction in dynamic
{ILP} processors",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "120--129",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Su:1994:BMS,
author = "C.-L Su and A. M. Despain",
title = "Branch with masked squashing in superpipelined
processors",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "130--140",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Blumrich:1994:VMM,
author = "M. A. Blumrich and K. Li and R. Alpert and C. Dubnicki
and E. W. Felten and J. Sandberg",
title = "Virtual memory mapped network interface for the
{SHRIMP} multicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "142--153",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Steenkiste:1994:AEH,
author = "P. Steenkiste and M. Hemy and T. Mummert and B. Zill",
title = "Architecture and evaluation of a high-speed networking
subsystem for distributed-memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "154--163",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nayfeh:1994:EDS,
author = "B. A. Nayfeh and K. Olukotun",
title = "Exploring the design space for a shared-cache
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "166--175",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thekkath:1994:ISB,
author = "R. Thekkath and S. J. Eggers",
title = "Impact of sharing-based thread placement on
multithreaded architectures",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "176--186",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dahlgren:1994:CPG,
author = "F. Dahlgren and M. Dubois and P. Stenstr{\"o}m",
title = "Combined performance gains of simple cache protocol
extensions",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "187--197",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huang:1994:SDC,
author = "A. S. Huang and G. Slavenburg and J. P. Shen",
title = "Speculative disambiguation: a compilation technique
for dynamic memory disambiguation",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "200--210",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farkas:1994:CPT,
author = "K. I. Farkas and N. P. Jouppi",
title = "Complexity\slash performance tradeoffs with
non-blocking loads",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "211--222",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1994:PSS,
author = "T.-F. Chen and J.-L. Baer",
title = "A performance study of software and hardware data
prefetching schemes",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "223--232",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Drapeau:1994:RIH,
author = "A. L. Drapeau and K. W. Shirriff and J. H. Hartman and
E. L. Miller and S. Seshan and R. H. Katz and K. Lutz
and D. A. Patterson and E. K. Lee and P. M. Chen and G.
A. Gibson",
title = "{RAID-II}: a high-bandwidth network file server",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "234--244",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Blaum:1994:EOS,
author = "M. Blaum and J. Brady and J. Bruck and J. Menon",
title = "{EVENODD}: an optimal scheme for tolerating double
disk failures in {RAID} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "245--254",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ng:1994:CDA,
author = "S. W. Ng",
title = "Crosshatch disk array for improved reliability and
performance",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "255--264",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeHon:1994:MRA,
author = "A. DeHon and F. Chong and M. Becker and E. Egozy and
H. Minsky and S. Peretz and T. F. {Knight, Jr.}",
title = "{METRO}: a router architecture for high-performance,
short-haul routing networks",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "266--277",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Allen:1994:AAR,
author = "J. D. Allen and P. T. Gaughan and D. E. Schimmel and
S. Yalamanchili",
title = "{Ariadne}---an adaptive router for fault-tolerant
multicomputers",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "278--288",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:1994:CRF,
author = "J. H. Kim and Z. Liu and A. A. Chien",
title = "Compressionless routing: a framework for adaptive and
fault-tolerant routing",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "289--300",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kuskin:1994:SFM,
author = "J. Kuskin and D. Ofelt and M. Heinrich and J. Heinlein
and R. Simoni and K. Gharachorloo and J. Chapin and D.
Nakahira and J. Baxter and M. Horowitz and A. Gupta and
M. Rosenblum and J. Hennessy",
title = "The {Stanford FLASH} multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "302--313",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chaiken:1994:SEC,
author = "D. Chaiken and A. Agarwal",
title = "Software-extended coherent shared memory: performance
and cost",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "314--324",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reinhardt:1994:TTU,
author = "S. K. Reinhardt and J. R. Larus and D. A. Wood",
title = "{Tempest} and {Typhoon}: user-level shared memory",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "325--336",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farrens:1994:SSC,
author = "M. Farrens and G. Tyson and A. R. Pleszkun",
title = "A study of single-chip processor\slash cache
organizations for large numbers of transistors",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "338--347",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:1994:UAT,
author = "C.-H. Chen and A. K. Somani",
title = "A unified architectural tradeoff methodology",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "348--357",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nagle:1994:OAC,
author = "D. Nagle and R. Uhlig and T. Mudge and S. Sechrest",
title = "Optimal allocation of on-chip memory for
multiple-{API} operating systems",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "358--369",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Quong:1994:ECM,
author = "R. W. Quong",
title = "Expected {I-cache} miss rates via the gap model",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "372--383",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1994:DSC,
author = "A. Seznec",
title = "Decoupled sectored caches: conciliating low tag
implementation cost",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "2",
pages = "384--393",
month = apr,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:40 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gurd:1994:SBB,
author = "J. R. Gurd",
title = "Supercomputing: big bang or steady state growth?",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "3",
pages = "3--13",
month = jun,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Litchfield:1994:IES,
author = "Kay P. Litchfield",
title = "Instruction execution sequence confirmation",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "3",
pages = "14--18",
month = jun,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Allen:1994:RWR,
author = "Phil Allen and Franc Brglez and Hal Carter and Robert
Caverly and Jerry Dillion and Albert Lo and Ron Lomax
and John Oldfield and Cesar Pina and T. J. Wilkinson",
title = "Report of the {1993 Workshop on Rapid Prototyping of
Microelectronic Systems for Universities}",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "3",
pages = "19--26",
month = jun,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1994:UNb,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "3",
pages = "27--28",
month = jun,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Madruga:1994:BRI,
author = "Ewerton Longoni Madruga",
title = "Book Review: {{\em Internetworking with TCP/IP, vol.
III: Client-Server programming and applications (BSD
Sockets version)\/}} by {Douglas E. Comer and David L.
Stevens (Prentice-Hall, 1993)}",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "3",
pages = "29--30",
month = jun,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jain:1994:SII,
author = "Ravi Jain and John Werth and J. C. Browne",
title = "{Special Issue on Input\slash Output in Parallel
Computer Systems}: {Introduction}",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "3--4",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baylor:1994:PEM,
author = "Sandra Johnson Baylor and Caroline Benveniste and
Yarsun Hsu",
title = "Performance evaluation of a massively parallel {I/O}
subsystem",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "5--10",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sinclair:1994:IPS,
author = "James B. Sinclair and Jay Tang and Peter J. Varman",
title = "Instability in parallel {I/O} systems",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "11--16",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vanderleest:1994:MBC,
author = "Steven H. Vanderleest and Ravishankar K. Iyer",
title = "Measurement of {I/O} bus contention and correlation
among heterogeneous device types in a single-bus
multiprocessor system",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "17--22",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thakur:1994:CCD,
author = "Rajeev Thakur and Rajesh Bordawekar and Alok
Choudhary",
title = "Compilation of out-of-core data parallel programs for
distributed memory machines",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "23--28",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Asthana:1994:EAM,
author = "Abhaya Asthana and Mark Cravatts and Paul
Krzyzanowski",
title = "An experimental active memory based {I/O} subsystem",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "29--34",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Durand:1994:DSA,
author = "Dannie Durand and Ravi Jain and David Tseytlin",
title = "Distributed scheduling algorithms to improve the
performance of parallel data transfers",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "35--40",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yokota:1994:DND,
author = "Haruo Yokota",
title = "{DR-nets}: data-reconstruction networks for highly
reliable parallel-disk systems",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "41--46",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Forsell:1994:MMPa,
author = "Martti J. Forsell",
title = "Are multiport memories physically feasible?",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "47--54",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chaudhry:1994:CMP,
author = "Ghulam Chaudhry and Xuechang Li",
title = "A case for the multithreaded processor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "55--59",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chan:1994:ECF,
author = "Yin Chan and Ashok Sudarsanam and Andrew Wolfe",
title = "The effect of compiler-flag tuning on {SPEC} benchmark
performance",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "60--70",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1994:RCC,
author = "Jin-Ho Lee and Min-Young Lee and Seong-Uk Choi and
Myong-Soon Park",
title = "Reducing cache conflicts in data cache prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "71--77",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1994:UNc,
author = "Mark Thorson",
title = "{Usenet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "4",
pages = "78--81",
month = sep,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:12 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Forsell:1994:MMPb,
author = "Martti J. Forsell",
title = "Are multiport memories physically feasible?",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "3--10",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sosic:1994:HCH,
author = "Rok Sosi{\v{c}}",
title = "History cache: hardware support for reverse
execution",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "11--18",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hill:1994:WWT,
author = "Mark D. Hill and James R. Larus and David A. Wood",
title = "The {Wisconsin Wind Tunnel} project: an annotated
bibliography",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "19--26",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Saha:1994:DDT,
author = "Avijit Saha and Nadeem Malik",
title = "Distributed directory tags",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "27--29",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Unwala:1994:SMP,
author = "Ishaq H. Unwala and Harvey G. Cragon",
title = "A study of {MIPS} programs",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "30--40",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1994:IN,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "41--46",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ohnemus:1994:BIL,
author = "Kenneth R. Ohnemus and Diana F. Mallin",
title = "Benefits of implementing on-line methods and
procedures",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "49--55",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cunningham:1994:LDT,
author = "Daniel K. Cunningham and Steven J. Reilly",
title = "Leading the design team---the evolution of the
technical writer from a support role to a design role",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "56--60",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rockley:1994:MTE,
author = "Ann Rockley",
title = "Multimedia: towards an electronic performance support
system",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "61--65",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Drew:1994:TTM,
author = "Katherine E. Drew",
title = "Telecommunicators and telecommuters: making
multiple-site documentation projects work",
journal = j-COMP-ARCH-NEWS,
volume = "22",
number = "5",
pages = "66--75",
month = dec,
year = "1994",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Severson:1995:TCP,
author = "Aimee Severson and Brent Nelson",
title = "Throughput in a counterflow pipeline processor",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "1",
pages = "5--12",
month = mar,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hsu:1995:SAC,
author = "Tsong-Chih Hsu and Sheng-De Wang",
title = "A simple architecture for constant time sorting
machines",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "1",
pages = "13--19",
month = mar,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wulf:1995:HMW,
author = "Wm. A. Wulf and Sally A. McKee",
title = "Hitting the memory wall: implications of the obvious",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "1",
pages = "20--24",
month = mar,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1995:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "1",
pages = "25--28",
month = mar,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:1995:AMA,
author = "Anant Agarwal and Ricardo Bianchini and David Chaiken
and Kirk L. Johnson and David Kranz and John
Kubiatowicz and Beng-Hong Lim and Kenneth Mackenzie and
Donald Yeung",
title = "The {MIT Alewife} machine: architecture and
performance",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "2--13",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kodama:1995:EXP,
author = "Yuetsu Kodama and Hirohumi Sakane and Mitsuhisa Sato
and Hayato Yamana and Shuichi Sakai and Yoshinori
Yamaguchi",
title = "The {EM-X} parallel computer: architecture and basic
performance",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "14--23",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Woo:1995:SPC,
author = "Steven Cameron Woo and Moriyoshi Ohara and Evan Torrie
and Jaswinder Pal Singh and Anoop Gupta",
title = "The {SPLASH-2} programs: characterization and
methodological considerations",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "24--36",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grahn:1995:ESS,
author = "H{\aa}kan Grahn and Per Stenstr{\"o}m",
title = "Efficient strategies for software-only protocols in
shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "38--47",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lebeck:1995:DSI,
author = "Alvin R. Lebeck and David A. Wood",
title = "Dynamic self-invalidation: reducing coherence overhead
in shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "48--59",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dahlgren:1995:BPH,
author = "Fredrik Dahlgren",
title = "Boosting the performance of hybrid snooping cache
protocols",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "60--69",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nowatzyk:1995:CNW,
author = "Andreas G. Nowatzyk and Michael C. Browne and Edmund
J. Kelly and Michael Parkin",
title = "{S}-connect: from networks of workstations to
supercomputer performance",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "71--82",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Varma:1995:DAD,
author = "Anujan Varma and Quinn Jacobson",
title = "Destage algorithms for disk arrays with non-volatile
caches",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "83--95",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stoll:1995:EMP,
author = "Gordon Stoll and Bin Wei and Douglas Clark and Edward
W. Felten and Kai Li and Patrick Hanrahan",
title = "Evaluating multi-port frame buffer designs for a
mesh-connected multicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "96--105",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nowatzyk:1995:CRD,
author = "Andreas G. Nowatzyk and Paul R. Prucnal",
title = "Are crossbars really dead?: the case for optical
multiprocessor interconnect systems",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "106--115",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jourdan:1995:ECF,
author = "St{\'e}phan Jourdan and Pascal Sainrat and Daniel
Litaize",
title = "Exploring configurations of functional units in an
out-of-order superscalar processor",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "117--125",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ando:1995:USE,
author = "Hideki Ando and Chikako Nakanishi and Tetsuya Hara and
Masao Nakaya",
title = "Unconstrained speculative execution with predicated
state buffering",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "126--137",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mahlke:1995:CFP,
author = "Scott A. Mahlke and Richard E. Hank and James E.
McCormick and David I. August and Wen-Mei W. Hwu",
title = "A comparison of full and partial predicated execution
support for {ILP} processors",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "138--150",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Simone:1995:ITO,
author = "M. Simone and A. Essen and A. Ike and A.
Krishnamoorthy and T. Maruyama and N. Patkar and M.
Ramaswami and M. Shebanow and V. Thirumalaiswamy and D.
Tovey",
title = "Implementation trade-offs in using a restricted data
flow architecture in a high performance {RISC}
microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "151--162",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Diep:1995:PEP,
author = "Trung A. Diep and Christopher Nelson and John Paul
Shen",
title = "Performance evaluation of the {PowerPC 620}
microarchitecture",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "163--174",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Romer:1995:RTM,
author = "Theodore H. Romer and Wayne H. Ohlrich and Anna R.
Karlin and Brian N. Bershad",
title = "Reducing {TLB} and memory overhead using online
superpage promotion",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "176--187",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:1995:SIA,
author = "Zheng Zhang and Josep Torrellas",
title = "Speeding up irregular applications in shared-memory
multiprocessors: memory binding and group prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "188--199",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anjan:1995:EFA,
author = "K. V. Anjan and Timothy Mark Pinkston",
title = "An efficient, fully adaptive deadlock recovery scheme:
{DISHA}",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "201--210",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shin:1995:AIH,
author = "Kang G. Shin and Stuart W. Daniel",
title = "Analysis and implementation of hybrid switching",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "211--219",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dao:1995:CFC,
author = "Binh Vien Dao and Jose Duato and Sudhakar
Yalamanchili",
title = "Configurable flow control mechanisms for
fault-tolerant routing",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "220--229",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Callahan:1995:NLO,
author = "Timothy Callahan and Seth Copen Goldstein",
title = "{NIFDY}: a low overhead, high throughput network
interface",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "230--241",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Peiron:1995:VMA,
author = "Montse Peiron and Mateo Valero and Eduard Ayguad{\'e}
and Tom{\'a}s Lang",
title = "Vector multiprocessors with arbitrated memory access",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "243--252",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kavi:1995:DCM,
author = "Krishna M. Kavi and A. R. Hurson and Phenil Patadia
and Elizabeth Abraham and Ponnarasu Shanmugam",
title = "Design of cache memories for multi-threaded dataflow
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "253--264",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bodin:1995:SAE,
author = "Fran{\c{c}}ois Bodin and Andr{\'e} Seznec",
title = "Skewed associativity enhances performance
predictability",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "265--274",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Young:1995:CAS,
author = "Cliff Young and Nicolas Gloy and Michael D. Smith",
title = "A comparative analysis of schemes for correlated
branch prediction",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "276--286",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Calder:1995:NCL,
author = "Brad Calder and Dirk Grunwald",
title = "Next cache line and set prediction",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "287--296",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Karamcheti:1995:CAS,
author = "Vijay Karamcheti and Andrew A. Chien",
title = "A comparison of architectural support for messaging in
the {TMC CM-5} and the {Cray T3D}",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "298--307",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stricker:1995:OMS,
author = "T. Stricker and T. Gross",
title = "Optimizing memory system performance for communication
in parallel computers",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "308--319",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arpaci:1995:EEC,
author = "Remzi H. Arpaci and David E. Culler and Arvind
Krishnamurthy and Steve G. Steinberg and Katherine
Yelick",
title = "Empirical evaluation of the {CRAY-T$3$D}: a compiler
perspective",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "320--331",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Conte:1995:OIF,
author = "Thomas M. Conte and Kishore N. Menezes and Patrick M.
Mills and Burzin A. Patel",
title = "Optimization of instruction fetch mechanisms for high
issue rates",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "333--344",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Uhlig:1995:IFC,
author = "Richard Uhlig and David Nagle and Trevor Mudge and
Stuart Sechrest and Joel Emer",
title = "Instruction fetching: coping with code bloat",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "345--356",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1995:ICF,
author = "Dennis Lee and Jean-Loup Baer and Brad Calder and Dirk
Grunwald",
title = "Instruction cache fetch policies for speculative
execution",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "357--367",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Austin:1995:SDC,
author = "Todd M. Austin and Dionisios N. Pnevmatikatos and
Gurindar S. Sohi",
title = "Streamlining data cache access with fast address
calculation",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "369--380",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:1995:CCA,
author = "Hong Wang and Tong Sun and Qing Yang",
title = "{CAT}---caching address tags: a technique for reducing
area cost of on-chip caches",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "381--390",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tullsen:1995:SMM,
author = "Dean M. Tullsen and Susan J. Eggers and Henry M.
Levy",
title = "Simultaneous multithreading: maximizing on-chip
parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "392--403",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ho:1995:AVP,
author = "Richard C. Ho and C. Han Yang and Mark A. Horowitz and
David L. Dill",
title = "Architecture validation for processors",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "404--413",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sohi:1995:MP,
author = "Gurindar S. Sohi and Scott E. Breach and T. N.
Vijaykumar",
title = "Multiscalar processors",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "2",
pages = "414--425",
month = may,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Beckmann:1995:HPM,
author = "Carl J. Beckmann",
title = "{HTGL}: a program modelling language",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "3",
pages = "3--10",
month = jun,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lafitte:1995:SDH,
author = "Jean-Louis Lafitte",
title = "On structured data handling in parallel processing",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "3",
pages = "11--18",
month = jun,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ulmann:1995:ESB,
author = "B. Ulmann",
title = "{o$ \mu $-EP-1}: a simple 32-bit architecture",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "3",
pages = "19--24",
month = jun,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1995:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "3",
pages = "25--27",
month = jun,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tabak:1995:CMH,
author = "Daniel Tabak",
title = "{{\em Cache and Memory Hierarchy Design: A
Performance-Directed Approach\/}} by {Steven A.
Przybylski}",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "3",
pages = "28--28",
month = jun,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:57 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilkes:1995:MWC,
author = "Maurice V. Wilkes",
title = "The memory wall and the {CMOS} end-point",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "4",
pages = "4--6",
month = sep,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:1995:GMW,
author = "Eric E. Johnson",
title = "Graffiti on ``the memory wall''",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "4",
pages = "7--8",
month = sep,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Afzal:1995:PMU,
author = "Tariq Afzal",
title = "Performance modeling using the {Motorola PowerPC}
timing simulator",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "4",
pages = "9--18",
month = sep,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parhami:1995:SMD,
author = "Behrooz Parhami",
title = "{SIMD} machines: do they have a significant future?",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "4",
pages = "19--22",
month = sep,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jain:1995:AAE,
author = "Ravi Jain and John Werth",
title = "Airdisks and {airRAID} (expanded extract): modeling
and scheduling periodic wireless data broadcast",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "4",
pages = "23--28",
month = sep,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kontothanassis:1995:ESM,
author = "Leonidas I. Kontothanassis and Michael L. Scott",
title = "Efficient shared memory with minimal hardware
support",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "4",
pages = "29--35",
month = sep,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gschwind:1995:VP,
author = "Michael K. Gschwind and Thomas J. Pietsch",
title = "Vector prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "5",
pages = "1--7",
month = dec,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Karne:1995:OOC,
author = "Ramesh K. Karne",
title = "Object-oriented computer architectures for new
generation of applications",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "5",
pages = "8--19",
month = dec,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1995:URA,
author = "Humayun Khalid",
title = "The unconventional replacement algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "5",
pages = "20--26",
month = dec,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1995:TDS,
author = "Humayun Khalid",
title = "A trace-driven simulation methodology",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "5",
pages = "27--33",
month = dec,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mirghafori:1995:TSB,
author = "Nikki Mirghafori and Margret Jacoby and David
Patterson",
title = "Truth in {SPEC} benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "5",
pages = "34--42",
month = dec,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1995:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "23",
number = "5",
pages = "43--44",
month = dec,
year = "1995",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mudge:1996:RPH,
author = "Trevor Mudge",
title = "Report on the panel: {``How Can Computer Architecture
Researchers Avoid Becoming the Society for
Irreproducible Results?''}",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "1",
pages = "1--5",
month = mar,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kwon:1996:COR,
author = "Oh-Young Kwon and Gi-Ho Park and Tack-Don Han",
title = "A compiler optimization to reduce execution time of
loop nest",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "1",
pages = "6--11",
month = mar,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1996:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "1",
pages = "12--16",
month = mar,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tabak:1996:BRA,
author = "Daniel Tabak",
title = "Book Review: {{\em Alpha Implementations and
Architecture\/}} by {Dileep P. Bhandarkar}",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "1",
pages = "17--18",
month = mar,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:34 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Evers:1996:UHB,
author = "Marius Evers and Po-Yung Chang and Yale N. Patt",
title = "Using hybrid branch predictors to improve branch
prediction accuracy in the presence of context
switches",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "3--11",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gloy:1996:ADB,
author = "Nicolas Gloy and Cliff Young and J. Bradley Chen and
Michael D. Smith",
title = "An analysis of dynamic branch prediction schemes on
system workloads",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "12--21",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sechrest:1996:CAD,
author = "Stuart Sechrest and Chih-Chieh Lee and Trevor Mudge",
title = "Correlation and aliasing in dynamic branch
predictors",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "22--32",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reinhardt:1996:DHS,
author = "Steven K. Reinhardt and Robert W. Pfile and David A.
Wood",
title = "Decoupled hardware support for distributed shared
memory",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "34--43",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yeung:1996:MMS,
author = "Donald Yeung and John Kubiatowicz and Anant Agarwal",
title = "{MGS}: a multigrain shared memory system",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "44--55",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Morin:1996:COB,
author = "Christine Morin and Alain Gefflaut and Michel
Ban{\^a}tre and Anne-Marie Kermarrec",
title = "{COMA}: an opportunity for building fault-tolerant
scalable shared memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "56--65",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nayfeh:1996:EDA,
author = "Basem A. Nayfeh and Lance Hammond and Kunle Olukotun",
title = "Evaluation of design alternatives for a multiprocessor
microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "67--77",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burger:1996:MBL,
author = "Doug Burger and James R. Goodman and Alain K{\"a}gi",
title = "Memory bandwidth limitations of future
microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "78--89",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Saulsbury:1996:MMW,
author = "Ashley Saulsbury and Fong Pong and Andreas Nowatzyk",
title = "Missing the memory wall: the case for processor\slash
memory integration",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "90--101",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:1996:DUP,
author = "Andr{\'e} Seznec",
title = "Don't use the page number, but a pointer to it",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "104--113",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Juan:1996:DBC,
author = "Toni Juan and Tom{\'a}s Lang and Juan J. Navarro",
title = "The difference-bit cache",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "114--120",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Iftode:1996:UAP,
author = "Liviu Iftode and Jaswinder Pal Singh and Kai Li",
title = "Understanding application performance on shared
virtual memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "122--133",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Holt:1996:AAB,
author = "Chris Holt and Jaswinder Pal Singh and John Hennessy",
title = "Application and architectural bottlenecks in large
scale distributed shared memory machines",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "134--145",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilson:1996:ICP,
author = "Kenneth M. Wilson and Kunle Olukotun and Mendel
Rosenblum",
title = "Increasing cache port efficiency for dynamic
superscalar microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "147--157",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Austin:1996:HBA,
author = "Todd M. Austin and Gurindar S. Sohi",
title = "High-bandwidth address translation for multiple-issue
processors",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "158--167",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hu:1996:DDC,
author = "Yiming Hu and Qing Yang",
title = "{DCD}---disk caching disk: a new approach for boosting
{I/O} performance",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "169--178",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maquelin:1996:PWC,
author = "Olivier Maquelin and Guang R. Gao and Herbert H. J.
Hum and Kevin B. Theobald and Xin-Min Tian",
title = "Polling watchdog: combining polling and interrupts for
efficient message handling",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "179--188",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tullsen:1996:ECI,
author = "Dean M. Tullsen and Susan J. Eggers and Joel S. Emer
and Henry M. Levy and Jack L. Lo and Rebecca L. Stamm",
title = "Exploiting choice: instruction fetch and issue on an
implementable simultaneous multithreading processor",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "191--202",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eickemeyer:1996:EMU,
author = "Richard J. Eickemeyer and Ross E. Johnson and Steven
R. Kunkel and Mark S. Squillante and Shiafun Liu",
title = "Evaluation of multithreaded uniprocessors for
commercial application environments",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "203--212",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hara:1996:PCI,
author = "Tetsuya Hara and Hideki Ando and Chikako Nakanishi and
Masao Nakaya",
title = "Performance comparison of {ILP} machines with cycle
time evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "213--224",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:1996:RCQ,
author = "Jae H. Kim and Andrew A. Chien",
title = "Rotating combined queueing {(RCQ)}: bandwidth and
latency guarantees in low-cost, high-performance
networks",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "226--236",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rexford:1996:RAR,
author = "Jennifer Rexford and John Hall and Kang G. Shin",
title = "A router architecture for real-time point-to-point
networks",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "237--246",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mukherjee:1996:CNI,
author = "Shubhendu S. Mukherjee and Babak Falsafi and Mark D.
Hill and David A. Wood",
title = "Coherent network interfaces for fine-grain
communication",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "247--258",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Horowitz:1996:IMO,
author = "Mark Horowitz and Margaret Martonosi and Todd C. Mowry
and Michael D. Smith",
title = "Informing memory operations: providing memory
performance feedback in modern processors",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "260--270",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Xia:1996:IPS,
author = "Chun Xia and Josep Torrellas",
title = "Instruction prefetching of systems codes with layout
optimized for reduced cache misses",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "271--282",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Choi:1996:CHS,
author = "Lynn Choi and Pen-Chung Yew",
title = "Compiler and hardware support for cache coherence in
large-scale multiprocessors: design considerations and
performance study",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "283--294",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Felten:1996:EEM,
author = "Edward W. Felten and Richard D. Alpert and Angelos
Bilas and Matthias A. Blumrich and Douglas W. Clark and
Stefanos N. Damianakis and Cezary Dubnicki and Liviu
Iftode and Kai Li",
title = "Early experience with message-passing on the {SHRIMP}
multicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "296--307",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lovett:1996:SCN,
author = "Tom Lovett and Russell Clapp",
title = "{STiNG}: a {CC-NUMA} computer system for the
commercial marketplace",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "2",
pages = "308--317",
month = may,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:47 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Carretero:1996:MPD,
author = "J. Carretero and F. P{\'e}rez and P. de Miguel and F.
Garc{\'\i}a and L. Alonso",
title = "A massively parallel and distributed {I/O} subsystem",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "3",
pages = "1--8",
month = jun,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ligon:1996:DLB,
author = "W. B. {Ligon III} and Daniel C. {Stanzione, Jr.}",
title = "Distributing and load-balancing for loops in
scientific applications",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "3",
pages = "9--17",
month = jun,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Belayneh:1996:DNBa,
author = "Samson Belayneh and David R. Kaeli",
title = "A discussion on non-blocking\slash lockup-free
caches",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "3",
pages = "18--25",
month = jun,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1996:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "3",
pages = "26--32",
month = jun,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Paez-Monzon:1996:RPD,
author = "Gerard P{\'a}ez-Monz{\'o}n and Charles
P{\'a}ez-Monz{\'o}n",
title = "The {RISC} processor {DMN-6}: a unified data-control
flow architecture",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "3--10",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pulido:1996:ETT,
author = "J. A. G{\'o}mez Pulido and J. M. S{\'a}nchez P{\'e}rez
and J. A. Moreno Zamora",
title = "An educational tool for testing hierarchical
multilevel caches",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "11--15",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Belayneh:1996:DNBb,
author = "Samson Belayneh and David R. Kaeli",
title = "A discussion on non-blocking\slash lockup-free
caches",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "16--16",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rosenbaum:1996:AP,
author = "Mark Rosenbaum",
title = "Architectural potholes",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "17--18",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mashey:1996:AP,
author = "John Mashey",
title = "Architectural potholes",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "18--18",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cockcroft:1996:P,
author = "Adrian Cockcroft",
title = "{I/O} potholes",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "18--19",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ebrahim:1996:P,
author = "Zahir Ebrahim",
title = "{I/O} potholes",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "19--20",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Carlile:1996:IB,
author = "Brad Carlile",
title = "Interpreting benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "20--21",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chase:1996:RW,
author = "David Chase",
title = "Register windows",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "21--21",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeMone:1996:RWD,
author = "Paul W. DeMone",
title = "Register windows and delay slots",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "4",
pages = "21--22",
month = sep,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:13 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rose:1996:CIT,
author = "Charlton D. Rose and J. Kelly Flanagan",
title = "Constructing instruction traces from cache-filtered
address traces {(CITCAT)}",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "5",
pages = "1--8",
month = dec,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hummel:1996:EDS,
author = "Susan Flynn Hummel",
title = "Efficient data sharing with conditional remote memory
transfers",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "5",
pages = "9--17",
month = dec,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Widigen:1996:EOR,
author = "Larry Widigen and Elliot Sowadsky and Kevin McGrath",
title = "Eliminating operand read latency",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "5",
pages = "18--22",
month = dec,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Machanick:1996:CSM,
author = "Philip Machanick",
title = "The case for {SRAM} main memory",
journal = j-COMP-ARCH-NEWS,
volume = "24",
number = "5",
pages = "23--30",
month = dec,
year = "1996",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:20 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhandarkar:1997:RVC,
author = "Dileep Bhandarkar",
title = "{RISC} versus {CISC}: a tale of two chips",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "1",
pages = "1--12",
month = mar,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martin:1997:SCM,
author = "I. Mart{\'\i}n and F. Tirado",
title = "A {SIMD} computer for multigrid methods",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "1",
pages = "13--18",
month = mar,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weicker:1997:USB,
author = "Reinhold Weicker",
title = "On the use of {SPEC} benchmarks in computer
architecture research",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "1",
pages = "19--22",
month = mar,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mukherjee:1997:WSG,
author = "Shubhendu S. Mukherjee",
title = "What should graduate students know before joining a
large computer architecture project?",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "1",
pages = "23--26",
month = mar,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1997:NCR,
author = "Humayun Khalid",
title = "A new cache replacement scheme based on
backpropagation neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "1",
pages = "27--33",
month = mar,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1997:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "1",
pages = "34--36",
month = mar,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vajapeyam:1997:ISI,
author = "Sriram Vajapeyam and Tulika Mitra",
title = "Improving superscalar instruction dispatch and issue
by exploiting dynamic code sequences",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "1--12",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nair:1997:EIL,
author = "Ravi Nair and Martin E. Hopkins",
title = "Exploiting instruction level parallelism in processors
by caching scheduled groups",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "13--25",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ebcioglu:1997:DDC,
author = "Kemal Ebcio{\u{g}}lu and Erik R. Altman",
title = "{DAISY}: dynamic compilation for 100\% architectural
compatibility",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "26--37",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pinkston:1997:DIN,
author = "Timothy Mark Pinkston and Sugath Warnakulasuriya",
title = "On deadlocks in interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "38--49",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Stunkel:1997:IMW,
author = "Craig B. Stunkel and Rajeev Sivaram and Dhabaleswar K.
Panda",
title = "Implementing multidestination worms in switch-based
parallel systems: architectural alternatives and their
impact",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "50--61",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alvarez:1997:TMF,
author = "Guillermo A. Alvarez and Walter A. Burkhard and Flaviu
Cristian",
title = "Tolerating multiple failures in {RAID} architectures
with optimal storage and uniform declustering",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "62--72",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Teodosiu:1997:HFC,
author = "Dan Teodosiu and Joel Baxter and Kinshuk Govil and
John Chapin and Mendel Rosenblum and Mark Horowitz",
title = "Hardware fault containment in scalable shared-memory
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "73--84",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martin:1997:ECL,
author = "Richard P. Martin and Amin M. Vahdat and David E.
Culler and Thomas E. Anderson",
title = "Effects of communication latency, overhead, and
bandwidth in a cluster architecture",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "85--97",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weber:1997:MIA,
author = "Wolf-Dietrich Weber and Stephen Gold and Pat Helland
and Takeshi Shimizu and Thomas Wicki and Winfried
Wilcke",
title = "The {Mercury Interconnect Architecture}: a
cost-effective infrastructure for high-performance
servers",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "98--107",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hakura:1997:DAC,
author = "Ziyad S. Hakura and Anoop Gupta",
title = "The design and analysis of a cache architecture for
texture mapping",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "108--120",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilson:1997:DHB,
author = "Kenneth M. Wilson and Kunle Olukotun",
title = "Designing high bandwidth on-chip caches",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "121--132",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farkas:1997:MSD,
author = "Keith I. Farkas and Paul Chow and Norman P. Jouppi and
Zvonko Vranesic",
title = "Memory-system design considerations for
dynamically-scheduled processors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "133--143",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ranganathan:1997:ISP,
author = "Parthasarathy Ranganathan and Vijay S. Pai and Hazim
Abdel-Shafi and Sarita V. Adve",
title = "The interaction of software prefetching with {ILP}
processors in shared-memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "144--156",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kontothanassis:1997:VBS,
author = "Leonidas Kontothanassis and Galen Hunt and Robert
Stets and Nikolaos Hardavellas and Micha{\l} Cierniak
and Srinivasan Parthasarathy and Wagner {Meira, Jr.}
and Sandhya Dwarkadas and Michael Scott",
title = "{VM}-based shared memory on low-latency,
remote-memory-access networks",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "157--169",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kagi:1997:ESL,
author = "Alain K{\"a}gi and Doug Burger and James R. Goodman",
title = "Efficient synchronization: let them eat {QOLB}",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "170--180",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moshovos:1997:DSS,
author = "Andreas Moshovos and Scott E. Breach and T. N.
Vijaykumar and Gurindar S. Sohi",
title = "Dynamic speculation and synchronization of data
dependences",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "181--193",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sodani:1997:DIR,
author = "Avinash Sodani and Gurindar S. Sohi",
title = "Dynamic instruction reuse",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "194--205",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Palacharla:1997:CES,
author = "Subbarao Palacharla and Norman P. Jouppi and J. E.
Smith",
title = "Complexity-effective superscalar processors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "206--218",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Michael:1997:CCA,
author = "Maged M. Michael and Ashwini K. Nanda and Beng-Hong
Lim and Michael L. Scott",
title = "Coherence controller architectures for {SMP}-based
{CC-NUMA} multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "219--228",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Falsafi:1997:RND,
author = "Babak Falsafi and David A. Wood",
title = "Reactive {NUMA}: a design for unifying {S-COMA} and
{CC-NUMA}",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "229--240",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Laudon:1997:SOC,
author = "James Laudon and Daniel Lenoski",
title = "The {SGI Origin}: a {ccNUMA} highly scalable server",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "241--251",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Joseph:1997:PUM,
author = "Doug Joseph and Dirk Grunwald",
title = "Prefetching using {Markov} predictors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "252--263",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Santhanam:1997:DPH,
author = "Vatsa Santhanam and Edward H. Gornish and Wei-Chung
Hsu",
title = "Data prefetching on the {HP PA-8000}",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "264--273",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:1997:TPI,
author = "Po-Yung Chang and Eric Hao and Yale N. Patt",
title = "Target prediction for indirect jumps",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "274--283",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sprangle:1997:APM,
author = "Eric Sprangle and Robert S. Chappell and Mitch Alsup
and Yale N. Patt",
title = "The agree predictor: a mechanism for reducing negative
branch history interference",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "284--291",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Michaud:1997:TCC,
author = "Pierre Michaud and Andr{\'e} Seznec and Richard
Uhlig",
title = "Trading conflict and capacity aliasing in conditional
branch predictors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "292--303",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Emer:1997:LDP,
author = "Joel Emer and Nikolas Gloy",
title = "A language for describing predictors and its
application to automatic synthesis",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "304--314",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:1997:RTA,
author = "Teresa L. Johnson and Wen-mei W. Hwu",
title = "Run-time adaptive cache hierarchy management via
reference analysis",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "315--326",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fromm:1997:EEI,
author = "Richard Fromm and Stylianos Perissakis and Neal
Cardwell and Christoforos Kozyrakis and Bruce McGaughy
and David Patterson and Tom Anderson and Katherine
Yelick",
title = "The energy efficiency of {IRAM} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "327--337",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burger:1997:DA,
author = "Doug Burger and Stefanos Kaxiras and James R.
Goodman",
title = "{DataScalar} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "2",
pages = "338--349",
month = may,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilkes:1997:CLS,
author = "Maurice Wilkes and Andrew Hopper",
title = "The collapsed {LAN}: a solution to a bandwidth
problem?",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "3",
pages = "1--5",
month = jun,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jokinen:1997:CDP,
author = "Tommi Jokinen and Chia-Jiu Wang",
title = "Cache design with path balancing table, skewing and
indirect tags",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "3",
pages = "6--12",
month = jun,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burger:1997:STS,
author = "Doug Burger and Todd M. Austin",
title = "The {SimpleScalar} tool set, version 2.0",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "3",
pages = "13--25",
month = jun,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1997:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "3",
pages = "26--27",
month = jun,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{VanMeter:1997:RCL,
author = "Rodney {Van Meter} and Greg Finn and Steve Hotz and
Dave Dyer",
title = "Response to the collapsed {LAN}",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "4",
pages = "1--12",
month = sep,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hu:1997:OES,
author = "Weiwu Hu and Peisu Xia",
title = "Out-of-order execution in sequentially consistent
shared-memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "4",
pages = "3--10",
month = sep,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1997:NTS,
author = "Humayun Khalid",
title = "A novel trace sampling technique",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "4",
pages = "11--16",
month = sep,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1997:PKC,
author = "Humayun Khalid",
title = "Performance of the {KORA-2} cache replacement scheme",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "4",
pages = "17--21",
month = sep,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jutla:1997:IAP,
author = "D. N. Jutla and P. Bodorik",
title = "Improving applications performance: a memory model and
cache architecture",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "4",
pages = "22--29",
month = sep,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ulmann:1997:NEP,
author = "B. Ulmann",
title = "{NICE}: an elegant and powerful 32-bit architecture",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "4",
pages = "30--35",
month = sep,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1997:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "4",
pages = "36--41",
month = sep,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pai:1997:RRS,
author = "Vijay S. Pai and Parthasarathy Ranganathan and Sarita
V. Adve",
title = "{RSIM}: {Rice} simulator for {ILP} multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "5",
pages = "1--1",
month = dec,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:21 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shi:1997:IID,
author = "Weisong Shi and Weiwu Hu and Ming Zhu",
title = "An innovative implementation for directory-based cache
coherence in shared memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "5",
pages = "2--9",
month = dec,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:21 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1997:INd,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "25",
number = "5",
pages = "10--14",
month = dec,
year = "1997",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:21 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ulmann:1998:ILE,
author = "B. Ulmann",
title = "Instruction looping, an extension to conditional
execution",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "1",
pages = "3--4",
month = mar,
year = "1998",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216461.1216462",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:32 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The following article describes an easy to implement
but very powerful extension to simple conditional
execution based program flow control as used for
example in the ARM RISC processors and others.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Haring:1998:IWP,
author = "G{\"u}nter Haring and Christoph Lindemann and Martin
Reiser",
title = "International workshop performance evaluation ---
origins and directions",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "1",
pages = "5--6",
month = mar,
year = "1998",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216461.1216463",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:32 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Performance Evaluation is a discipline of Computer
Science for some thirty years. It seems time to take
stock of what we were doing. That is, provide answers
to the following questions:{\bullet} What are its
scientific contributions?{\bullet} What is its
relevance in industry and business?{\bullet} What is
its standing in academia?{\bullet} Where is the field
headed?{\bullet} What are its success stories and
failures?{\bullet} What are its current burning
questions?",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Munsil:1998:RSU,
author = "Wes Munsil and Chia-Jiu Wang",
title = "Reducing stack usage in {Java} bytecode execution",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "1",
pages = "7--11",
month = mar,
year = "1998",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216461.1216464",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:32 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "For many years, the Tomasulo method of dynamically
scheduling instructions for execution in a load/store
processor has been known and used. This paper presents
an adaptation of the Tomasulo method to a stack-based
processor architecture, and illustrates its use in a
software simulator of a subset of the Java Virtual
Machine. Experimental results show that the adapted
Tomasulo method reduces stack usage, in some cases
eliminating it altogether. This method should be of
interest to computer architects and those involved in
the implementation and use of the Java programming
language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1998:INaa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "1",
pages = "12--17",
month = mar,
year = "1998",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216461.1216465",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:32 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on Internet --- an international computer
network. As always, the opinions expressed in this
column are the personal views of the authors, and do
not necessarily represent the institutions to which
they are affiliated. Text which sets the context of a
message appears in italics; this is usually text the
author has quoted from earlier messages. The code-like
expressions below the authors' names are their
addresses on Internet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moudgill:1998:TFS,
author = "Mayan Moudgill",
title = "Techniques for fast simulation of associative cache
directories",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "2",
pages = "1--8",
month = may,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chung:1998:LBC,
author = "Byung-Kwon Chung and Jih-Kwon Peir",
title = "{LRU}-based column-associative caches",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "2",
pages = "9--17",
month = may,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1998:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "2",
pages = "18--22",
month = may,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barroso:1998:MSC,
author = "Luiz Andr{\'e} Barroso and Kourosh Gharachorloo and
Edouard Bugnion",
title = "Memory system characterization of commercial
workloads",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "3--14",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keeton:1998:PCQ,
author = "Kimberly Keeton and David A. Patterson and Yong Qiang
He and Roger C. Raphael and Walter E. Baker",
title = "Performance characterization of a {Quad Pentium Pro
SMP} using {OLTP} workloads",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "15--26",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:1998:ECD,
author = "Dennis C. Lee and Patrick J. Crowley and Jean-Loup
Baer and Thomas E. Anderson and Brian N. Bershad",
title = "Execution characteristics of desktop applications on
{Windows NT}",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "27--38",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lo:1998:ADW,
author = "Jack L. Lo and Luiz Andr{\'e} Barroso and Susan J.
Eggers and Kourosh Gharachorloo and Henry M. Levy and
Sujay S. Parekh",
title = "An analysis of database workload performance on
simultaneous multithreaded processors",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "39--50",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Evers:1998:ACP,
author = "Marius Evers and Sanjay J. Patel and Robert S.
Chappell and Yale N. Patt",
title = "An analysis of correlation and predictability: what
makes two-level branch predictors work",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "52--61",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Federovsky:1998:BPB,
author = "Eitan Federovsky and Meir Feder and Sholomo Weiss",
title = "Branch prediction based on universal data compression
algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "62--72",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sazeides:1998:MPP,
author = "Yiannakis Sazeides and James E. Smith",
title = "Modeling program predictability",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "73--84",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cox:1998:MLT,
author = "Michael Cox and Narendra Bhandari and Michael Shantz",
title = "Multi-level texture caching for {$3$D} graphics
hardware",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "86--97",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eberle:1998:SQC,
author = "Hans Eberle and Erwin Oertli",
title = "{Switcherland}: a {QoS} communication architecture for
workstation clusters",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "98--108",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alvarez:1998:DDA,
author = "Guillermo A. Alvarez and Walter A. Burkhard and Larry
J. Stockmeyer and Flaviu Cristian",
title = "Declustered disk array architectures with optimal and
near-optimal parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "109--120",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grunwald:1998:CES,
author = "Dirk Grunwald and Artur Klauser and Srilatha Manne and
Andrew Pleszkun",
title = "Confidence estimation for speculation control",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "122--131",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Manne:1998:PGS,
author = "Srilatha Manne and Artur Klauser and Dirk Grunwald",
title = "Pipeline gating: speculation control for energy
reduction",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "132--141",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chrysos:1998:MDP,
author = "George Z. Chrysos and Joel S. Emer",
title = "Memory dependence prediction using store sets",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "142--153",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Juan:1998:DHL,
author = "Toni Juan and Sanji Sanjeevan and Juan J. Navarro",
title = "Dynamic history-length fitting: a third level of
adaptivity for branch prediction",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "155--166",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Driesen:1998:AIB,
author = "Karel Driesen and Urs H{\"o}lzle",
title = "Accurate indirect branch prediction",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "167--178",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mukherjee:1998:UPA,
author = "Shubhendu S. Mukherjee and Mark D. Hill",
title = "Using prediction to accelerate coherence protocols",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "179--190",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oskin:1998:APC,
author = "Mark Oskin and Frederic T. Chong and Timothy
Sherwood",
title = "Active pages: a computation model for intelligent
memory",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "192--203",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Swanson:1998:ITR,
author = "Mark Swanson and Leigh Stoller and John Carter",
title = "Increasing {TLB} reach using superpages backed by
shadow memory",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "204--213",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Qiu:1998:ODA,
author = "Xiaogang Qiu and Michel Dubois",
title = "Options for dynamic address translation in {COMAs}",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "214--225",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{August:1998:IPS,
author = "David I. August and Daniel A. Connors and Scott A.
Mahlke and John W. Sias and Kevin M. Crozier and
Ben-Chung Cheng and Patrick R. Eaton and Qudus B.
Olaniran and Wen-mei W. Hwu",
title = "Integrated predicated and speculative execution in the
{IMPACT EPIC} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "227--237",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wallace:1998:TMP,
author = "Steven Wallace and Brad Calder and Dean M. Tullsen",
title = "Threaded multiple path execution",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "238--249",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Klauser:1998:SEE,
author = "Artur Klauser and Abhijit Paithankar and Dirk
Grunwald",
title = "Selective eager execution on the {PolyPath}
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "250--259",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patel:1998:ITC,
author = "Sanjay Jeram Patel and Marius Evers and Yale N. Patt",
title = "Improving trace cache effectiveness with branch
promotion and trace packing",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "262--271",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gabbay:1998:EIF,
author = "Freddy Gabbay and Avi Mendelson",
title = "The effect of instruction fetch bandwidth on value
prediction",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "272--281",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Albonesi:1998:DIC,
author = "David H. Albonesi",
title = "Dynamic {IPC\slash clock} rate optimization",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "282--292",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:1998:PMC,
author = "Yinong Zhang and George B. {Adams III}",
title = "Performance modeling and code partitioning for the
{DS} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "293--304",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keckler:1998:EFG,
author = "Stephen W. Keckler and William J. Dally and Daniel
Maskit and Nicholas P. Carter and Andrew Chang and Whay
S. Lee",
title = "Exploiting fine-grain thread level parallelism on the
{MIT} multi-{ALU} processor",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "306--317",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abandah:1998:EAT,
author = "Gheith A. Abandah and Edward S. Davidson",
title = "Effects of architectural and technological advances on
the {HP\slash Convex Exemplar}'s memory and
communication performance",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "318--329",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Blumrich:1998:DCS,
author = "Matthias A. Blumrich and Richard D. Alpert and Yuqun
Chen and Douglas W. Clark and Stefanos N. Damianakis
and Cezary Dubnicki and Edward W. Felten and Liviu
Iftode and Kai Li and Margaret Martonosi and Robert A.
Shillner",
title = "Design choices in the {SHRIMP} system: an empirical
study",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "330--341",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Soundararajan:1998:FUM,
author = "Vijayaraghavan Soundararajan and Mark Heinrich and Ben
Verghese and Kourosh Gharachorloo and Anoop Gupta and
John Hennessy",
title = "Flexible use of memory for replication\slash migration
in cache-coherent {DSM} multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "342--355",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:1998:ESL,
author = "Sanjeev Kumar and Christopher Wilkerson",
title = "Exploiting spatial locality in data caches using
spatial footprints",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "357--368",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lynch:1998:LLL,
author = "William L. Lynch and Gary Lauterbach and Joseph I.
Chamdani",
title = "Low load latency through sum-addressed memory
{(SAM)}",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "369--379",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sorin:1998:AES,
author = "Daniel J. Sorin and Vijay S. Pai and Sarita V. Adve
and Mary K. Vernon and David A. Wood",
title = "Analytic evaluation of shared-memory systems with
{ILP} processors",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "3",
pages = "380--391",
month = jun,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:58 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Golla:1998:CEB,
author = "Prasad N. Golla and Eric C. Lin",
title = "A comparison of the effect of branch prediction on
multithreaded and scalar architectures",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "4",
pages = "3--11",
month = sep,
year = "1998",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216475.1216476",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:40 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Speculative instructions execution requires dynamic
branch predictors to increase the performance of a
processor by executing from predicted branch target
routines. Conventional Scalar architectures such as the
Superscalar or Multiscalar architecture executes from a
single stream, while a Multithreaded architecture
executes from multiple streams at a time. Several
aggressive branch predictors have been proposed with
high prediction accuracies. Unfortunately, none of the
branch predictors can provide 100\% accuracy.
Therefore, there is an inherent limitation on
speculative execution in real implementation. In this
paper, we show that Multithreaded architecture is a
better candidate for utilizing speculative execution
than Scalar architectures. Generally the branch
prediction performance degradation is compounded for
larger window sizes on Scalar architectures, while for
a Multithreaded architecture, by increasing the number
of executing threads, we could sustain a higher
performance for a large aggregated speculative window
size. Hence, heavier workloads may increase performance
and utilization for Multithreaded architectures. We
present analytical and simulation results to support
our argument.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1998:INc,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "4",
pages = "12--16",
month = sep,
year = "1998",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1216475.1216477",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:40 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on Internet---an international computer
network. As always, the opinions expressed in this
column are the personal views of the authors, and do
not necessarily represent the institutions to which
they are affiliated. Text which sets the context of a
message appears in italics; this is usually text the
author has quoted from earlier messages. The code-like
expressions below the authors' names are their
addresses on Internet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Machanick:1998:SVL,
author = "Philip Machanick",
title = "Streaming vs. latency in information mass-transit",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "5",
pages = "4--6",
month = dec,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:21 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lafitte:1998:GMD,
author = "Jean-Louis Lafitte",
title = "A generalized mapping device to help memory latency",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "5",
pages = "7--13",
month = dec,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:21 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ashraf:1998:IRM,
author = "Farooq Ashraf and Mostafa Abd-El-Barr and Khalid
Al-Tawil",
title = "Introduction to routing in multicomputer networks",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "5",
pages = "14--21",
month = dec,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:21 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilmot:1998:DTM,
author = "Dick Wilmot",
title = "Data threaded microarchitecture",
journal = j-COMP-ARCH-NEWS,
volume = "26",
number = "5",
pages = "22--32",
month = dec,
year = "1998",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:21 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuen:1999:SR,
author = "C. K. Yuen",
title = "Stack and {RISC}",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "3--9",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baylor:1999:USS,
author = "Sandra Johnson Baylor",
title = "Unified scalable shared memory architectures",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "10--21",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeWitt:1999:PTL,
author = "Anthony DeWitt and Thomas Gross",
title = "The potential of thread-level speculation based on
value profiling",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "22--22",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kalamatianos:1999:IAI,
author = "John Kalamatianos and David R. Kaeli",
title = "Improving the accuracy of indirect branch prediction
via branch classification",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "23--26",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ju:1999:PMD,
author = "Roy Dz-ching Ju and Jean-Fran{\c{c}}ois Collard and
Karim Oukbir",
title = "Probabilistic memory disambiguation and its
application to data speculation",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "27--30",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Postiff:1999:LIL,
author = "Matthew A. Postiff and David A. Greene and Gary S.
Tyson and Trevor N. Mudge",
title = "The limits of instruction level parallelism in
{SPEC95} applications",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "31--34",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yang:1999:LMJ,
author = "Byung-Sun Yang and Junpyo Lee and Jinpyo Park and
Soo-Mook Moon and Kemal Ebcio{\u{g}}lu and Erik
Altman",
title = "Lightweight monitor for {Java VM}",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "35--38",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rao:1999:SAU,
author = "Amit Rao and Santosh Pande",
title = "Storage assignment using expression tree
transformations to generate compact and efficient {DSP}
code",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "39--42",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Flautner:1999:HLS,
author = "Kriszti{\'a}n Flautner and Gary S. Tyson and Trevor
Mudge",
title = "A high level simulator integrated with the {Mirv}
compiler",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "43--46",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Casse:1999:UAI,
author = "H. Cass{\'e} and L. F{\'e}raud and C. Rochange and P.
Sainrat",
title = "Using the abstract interpretation technique for static
pointer analysis",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "47--50",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bahar:1999:CSC,
author = "Iris Bahar and Brad Calder and Dirk Grunwald",
title = "A comparison of software code reordering and victim
buffers",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "51--54",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Carr:1999:ISP,
author = "Steve Carr and Philip Sweany",
title = "Improving software pipelining with hardware support
for self-spatial loads",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "1",
pages = "55--58",
month = mar,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:35 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barua:1999:MCM,
author = "Rajeev Barua and Walter Lee and Saman Amarasinghe and
Anant Agarwal",
title = "{Maps}: a compiler-managed memory system for raw
machines",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "4--15",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vajapeyam:1999:DVM,
author = "Sriram Vajapeyam and P. J. Joseph and Tulika Mitra",
title = "Dynamic vectorization: a mechanism for exploiting
far-flung {ILP} in ordinary programs",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "16--27",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goldstein:1999:PCP,
author = "Seth Copen Goldstein and Herman Schmit and Matthew Moe
and Mihai Budiu and Srihari Cadambi and R. Reed Taylor
and Ronald Laufer",
title = "{PipeRench}: a co\slash processor for streaming
multimedia acceleration",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "28--39",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yoaz:1999:STI,
author = "Adi Yoaz and Mattan Erez and Ronny Ronen and Stephan
Jourdan",
title = "Speculation techniques for improving load related
instruction scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "42--53",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bekerman:1999:CLA,
author = "Michael Bekerman and Stephan Jourdan and Ronny Ronen
and Gilad Kirshenboim and Lihu Rappoport and Adi Yoaz
and Uri Weiser",
title = "Correlated load-address predictors",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "54--63",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Calder:1999:SVP,
author = "Brad Calder and Glenn Reinman and Dean M. Tullsen",
title = "Selective value prediction",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "64--74",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Qiu:1999:TLM,
author = "Xiaogang Qiu and Michel Dubois",
title = "Tolerating late memory traps in {ILP} processors",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "76--87",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Luk:1999:MFE,
author = "Chi-Keung Luk and Todd C. Mowry",
title = "Memory forwarding: enabling aggressive layout
optimizations by guaranteeing the safety of data
relocation",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "88--99",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cho:1999:DLV,
author = "Sangyeun Cho and Pen-Chung Yew and Gyungho Lee",
title = "Decoupling local variable accesses in a wide-issue
superscalar processor",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "100--110",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Roth:1999:EJP,
author = "Amir Roth and Gurindar S. Sohi",
title = "Effective jump-pointer prefetching for linked data
structures",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "111--121",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ranganathan:1999:PIV,
author = "Parthasarathy Ranganathan and Sarita Adve and Norman
P. Jouppi",
title = "Performance of image and video processing with
general-purpose processors and media {ISA} extensions",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "124--135",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Merten:1999:HDP,
author = "Matthew C. Merten and Andrew R. Trick and Christopher
N. George and John C. Gyllenhaal and Wen-mei W. Hwu",
title = "A hardware-driven profiling scheme for identifying
program hot spots to support runtime optimization",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "136--147",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shen:1999:CRF,
author = "Xiaowei Shen and Arvind and Larry Rudolph",
title = "Commit-reconcile \& fences {(CRF)}: a new memory
model for architects and compiler writers",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "150--161",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gniady:1999:SIR,
author = "Chris Gniady and Babak Falsafi and T. N. Vijaykumar",
title = "Is {SC + ILP = RC}?",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "162--171",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "Instruction level parallelism (ILP); release
consistency (RC); sequential consistency (SC)",
}
@Article{Lai:1999:MSP,
author = "An-Chow Lai and Babak Falsafi",
title = "Memory sharing predictor: the key to a speculative
coherent {DSM}",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "172--183",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chappell:1999:SSM,
author = "Robert S. Chappell and Jared Stark and Sangwook P. Kim
and Steven K. Reinhardt and Yale N. Patt",
title = "Simultaneous subordinate microthreading {(SSMT)}",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "186--195",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Black:1999:BBT,
author = "Bryan Black and Bohuslav Rychlik and John Paul Shen",
title = "The block-based trace cache",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "196--207",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{August:1999:PDL,
author = "David I. August and John W. Sias and Jean-Michel
Puiatti and Scott A. Mahlke and Daniel A. Connors and
Kevin M. Crozier and Wen-mei W. Hwu",
title = "The program decision logic approach to predicated
execution",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "208--219",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cuppu:1999:PCC,
author = "Vinodh Cuppu and Bruce Jacob and Brian Davis and
Trevor Mudge",
title = "A performance comparison of contemporary {DRAM}
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "222--233",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reinman:1999:SFE,
author = "Glenn Reinman and Todd Austin and Brad Calder",
title = "A scalable front-end architecture for fast instruction
delivery",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "234--245",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:1999:AEA,
author = "Seongwoo Kim and Arun K. Somani",
title = "Area efficient architectures for information integrity
in cache memories",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "246--255",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nakra:1999:VPV,
author = "Tarun Nakra and Rajiv Gupta and Mary Lou Soffa",
title = "Value prediction in {VLIW} machines",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "258--269",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tullsen:1999:SVP,
author = "Dean M. Tullsen and John S. Seng",
title = "Storageless value prediction using prior register
values",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "270--279",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bilas:1999:UNI,
author = "Angelos Bilas and Cheng Liao and Jaswinder Pal Singh",
title = "Using network interface support to avoid asynchronous
protocol processing in shared virtual memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "282--293",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bilir:1999:MSN,
author = "E. Ender Bilir and Ross M. Dickson and Ying Hu and
Manoj Plakal and Daniel J. Sorin and Mark D. Hill and
David A. Wood",
title = "Multicast snooping: a new coherence method using a
multicast address network",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "294--304",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jiang:1999:SAP,
author = "Dongming Jiang and Jaswinder Pal Singh",
title = "Scaling application performance on a cache-coherent
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "2",
pages = "305--316",
month = may,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:1999:MSF,
author = "Anonymous",
title = "In memoriam---{SIGARCH} founder: {Caxton C. Foster}",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "1--3",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwang:1999:SSI,
author = "Seung H. Hwang and Gwan S. Choi",
title = "Selective-set-invalidation {(SSI)} for
soft-error-resilient cache architecture",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "4--9",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheng:1999:DHP,
author = "Peng Cheng and Hai Jin and Jiangling Zhang",
title = "Design of high performance {RAID} in real-time
system",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "10--17",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuen:1999:ASC,
author = "C. K. Yuen",
title = "Architectural support for the cache based vector
computation",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "18--23",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Driker:1999:DCC,
author = "Benjamin Driker",
title = "Disbursed control computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "24--31",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1999:PEM,
author = "Humayun Khalid",
title = "Performance evaluation of multimedia systems with
{MPEG-2} bitstreams",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "32--37",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1999:MPE,
author = "Humayun Khalid",
title = "A methodology for performance evaluation of systems
with large emulation code",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "38--42",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1999:TMB,
author = "Humayun Khalid",
title = "Tracing multimedia benchmarks with five degrees of
validation",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "43--48",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khalid:1999:PET,
author = "Humayun Khalid",
title = "Performance evaluation of two operating systems",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "49--52",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1999:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "3",
pages = "53--60",
month = jun,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Machanick:1999:CRA,
author = "Phillip Machanick",
title = "Correction to {RAMpage ASPOLOS} paper",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "4",
pages = "2--5",
month = sep,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shahhoseini:1999:ABP,
author = "H. S. Shahhoseini and M. Naderi and S. Nemati",
title = "Achieving the best performance on superscalar
processors",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "4",
pages = "6--11",
month = sep,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1999:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "4",
pages = "12--14",
month = sep,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Torrant:1999:SMS,
author = "Marc Torrant and Muhammad Shaaban and Roy Czernikowski
and Ken Hsu",
title = "A simultaneous multithreading simulator",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "5",
pages = "1--5",
month = dec,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:1999:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "27",
number = "5",
pages = "6--10",
month = dec,
year = "1999",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dai:2000:LSO,
author = "Min Dai and Christine Eisenbeis and Sid-Ahmed-Ali
Touati",
title = "Load-store optimization for software pipelining",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "3--10",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Clauss:2000:AML,
author = "Philippe Clauss and Beno{\^\i}t Meister",
title = "Automatic memory layout transformations to optimize
spatial locality in parameterized loop nests",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "11--19",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kreaseck:2000:LTB,
author = "Barbara Kreaseck and Dean Tullsen and Brad Calder",
title = "Limits of task-based parallelism in irregular
applications",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "20--20",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:2000:RVC,
author = "Junpyo Lee and Byung-Sun Yang and Suhyun Kim and Kemal
Ebcio{\u{g}}lu and Erik Altman and Seungil Lee and Yoo
C. Chung and Heungbok Lee and Je Hyung Lee and Soo-Mook
Moon",
title = "Reducing virtual call overheads in a {Java VM}
just-in-time compiler",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "21--33",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sadler:2000:APE,
author = "Chris Sadler and Sandeep K. S. Gupta and Rohit
Bhatia",
title = "Applying predication to efficiently handle runtime
class testing",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "34--42",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bermudo:2000:OCM,
author = "Nerina Bermudo and Xavier Vera and Antonio
Gonz{\'a}lez and Josep Llosa",
title = "Optimizing cache miss equations polyhedra",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "43--52",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Unger:2000:CCA,
author = "A. Unger and E. Zehendner and Th. Ungerer",
title = "A combined compiler and architecture technique to
control multithreaded execution of branches and loop
iterations",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "53--61",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aydin:2000:UCL,
author = "Hakan Aydin and David Kaeli",
title = "Using cache line coloring to perform aggressive
procedure inlining",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "62--71",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tyagi:2000:COP,
author = "Akhilesh Tyagi and Gyungho Lee",
title = "A compiler optimization paradigm for dynamic energy
management",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "72--76",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2000:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "1",
pages = "77--78",
month = mar,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Steffan:2000:SAT,
author = "J. Greggory Steffan and Christopher B. Colohan and
Antonia Zhai and Todd C. Mowry",
title = "A scalable approach to thread-level speculation",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "1--12",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cintra:2000:ASS,
author = "Marcelo Cintra and Jos{\'e} F. Mart{\'\i}nez and Josep
Torrellas",
title = "Architectural support for scalable speculative
parallelization in shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "13--24",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reinhardt:2000:TFD,
author = "Steven K. Reinhardt and Shubhendu S. Mukherjee",
title = "Transient fault detection via simultaneous
multithreading",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "25--36",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jacobson:2000:TP,
author = "Quinn Jacobson and James E. Smith",
title = "Trace preconstruction",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "37--46",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rakvic:2000:CTM,
author = "Ryan Rakvic and Bryan Black and John Paul Shen",
title = "Completion time multiple branch prediction for
enhancing trace cache performance",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "47--58",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Merten:2000:HMD,
author = "Matthew C. Merten and Andrew R. Trick and Erik M.
Nystrom and Ronald D. Barnes and Wen-mei W. Hmu",
title = "A hardware mechanism for dynamic extraction and
relayout of program hot spots",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "59--70",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oskin:2000:HCS,
author = "Mark Oskin and Frederic T. Chong and Matthew Farrens",
title = "{HLS}: combining statistical and symbolic simulation
to guide microprocessor designs",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "71--82",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brooks:2000:WFA,
author = "David Brooks and Vivek Tiwari and Margaret Martonosi",
title = "{Wattch}: a framework for architectural-level power
analysis and optimizations",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "83--94",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vijaykrishnan:2000:EDI,
author = "N. Vijaykrishnan and M. Kandemir and M. J. Irwin and
H. S. Kim and W. Ye",
title = "Energy-driven integrated hardware-software
optimizations using {SimplePower}",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "95--106",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hallnor:2000:FAS,
author = "Erik G. Hallnor and Steven K. Reinhardt",
title = "A fully associative software-managed cache design",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "107--116",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Saulsbury:2000:RBT,
author = "Ashley Saulsbury and Fredrik Dahlgren and Per
Stenstr{\"o}m",
title = "Recency-based {TLB} preloading",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "117--127",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rixner:2000:MAS,
author = "Scott Rixner and William J. Dally and Ujval J. Kapasi
and Peter Mattson and John D. Owens",
title = "Memory access scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "128--138",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lai:2000:SAT,
author = "An-Chow Lai and Babak Falsafi",
title = "Selective, accurate, and timely self-invalidation
using last-touch prediction",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "139--148",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Margolus:2000:EDA,
author = "Norman Margolus",
title = "An embedded {DRAM} architecture for large-scale
spatial-lattice computations",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "149--160",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mai:2000:SMM,
author = "Ken Mai and Tim Paaske and Nuwan Jayasena and Ron Ho
and William J. Dally and Mark Horowitz",
title = "Smart {Memories}: a modular reconfigurable
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "161--171",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zilles:2000:UBS,
author = "Craig B. Zilles and Gurindar S. Sohi",
title = "Understanding the backward slices of performance
degrading instructions",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "172--181",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lepak:2000:VLS,
author = "Kevin M. Lepak and Mikko H. Lipasti",
title = "On the value locality of store instructions",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "182--191",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cvetanovic:2000:PAA,
author = "Zarka Cvetanovic and R. E. Kessler",
title = "Performance analysis of the {Alpha 21264}-based
{Compaq ES40} system",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "192--202",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Faraboschi:2000:LTP,
author = "Paolo Faraboschi and Geoffrey Brown and Joseph A.
Fisher and Giuseppe Desoli and Fred Homewood",
title = "{Lx}: a technology platform for customizable {VLIW}
embedded processing",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "203--213",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ranganathan:2000:RCT,
author = "Parthasarathy Ranganathan and Sarita Adve and Norman
P. Jouppi",
title = "Reconfigurable caches and their application to media
processing",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "214--224",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ye:2000:CHP,
author = "Zhi Alex Ye and Andreas Moshovos and Scott Hauck and
Prithviraj Banerjee",
title = "{CHIMAERA}: a high-performance architecture with a
tightly-coupled reconfigurable functional unit",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "225--235",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Henry:2000:CWW,
author = "Dana S. Henry and Bradley C. Kuszmaul and Gabriel H.
Loh and Rahul Sami",
title = "Circuits for wide-window superscalar processors",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "236--247",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:2000:CRV,
author = "Vikas Agarwal and M. S. Hrishikesh and Stephen W.
Keckler and Doug Burger",
title = "Clock rate versus {IPC}: the end of the road for
conventional microarchitectures",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "248--259",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smith:2000:VIS,
author = "J. E. Smith and Greg Faanes and Rabin Sugumar",
title = "Vector instruction set support for conditional
operations",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "260--269",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chou:2000:IPC,
author = "Yuan Chou and John Paul Shen",
title = "Instruction path coprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "270--281",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barroso:2000:PSA,
author = "Luiz Andr{\'e} Barroso and Kourosh Gharachorloo and
Robert McNamara and Andreas Nowatzyk and Shaz Qadeer
and Barton Sano and Scott Smith and Robert Stets and
Ben Verghese",
title = "{Piranha}: a scalable architecture based on
single-chip multiprocessing",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "282--293",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Radhakrishnan:2000:AIE,
author = "Ramesh Radhakrishnan and Deependra Talla and Lizy
Kurian John",
title = "Allowing for {ILP} in an embedded {Java} processor",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "294--305",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bekerman:2000:ELA,
author = "Michael Bekerman and Adi Yoaz and Freddy Gabbay and
Stephan Jourdan and Maxim Kalaev and Ronny Ronen",
title = "Early load address resolution via register tracking",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "306--315",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cruz:2000:MBR,
author = "Jos{\'e}-Lorenzo Cruz and Antonio Gonz{\'a}lez and
Mateo Valero and Nigel P. Topham",
title = "Multiple-banked register file architectures",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "2",
pages = "316--325",
month = may,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:49 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fernandez:2000:EPN,
author = "Benjam{\'\i}n Sahelices Fern{\'a}ndez and Diego R.
Llanos Ferraris and Agust{\'\i}n de Dios
Hern{\'a}ndez",
title = "Exploiting parallelism in a network of workstations
using {COMA-BC}",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "3",
pages = "1--8",
month = jun,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2000:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "3",
pages = "9--13",
month = jun,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lafitte:2000:RDH,
author = "Jean-Louis Lafitte",
title = "Regarding a device to help battering the {RAM} wall",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "4",
pages = "4--10",
month = sep,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Petit:2000:LSE,
author = "S. Petit and J. A. Gil and J. Sahuquillo and A. Pont",
title = "{LIDE}: a simulation environment for shared virtual
memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "4",
pages = "11--18",
month = sep,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:14 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schlosser:2000:DCS,
author = "Steven W. Schlosser and John Linwood Griffin and David
F. Nagle and Gregory R. Ganger",
title = "Designing computer systems with {MEMS}-based storage",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "1--12",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gharachorloo:2000:ADA,
author = "Kourosh Gharachorloo and Madhu Sharma and Simon Steely
and Stephen {Van Doren}",
title = "Architecture and design of {AlphaServer GS320}",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "13--24",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martin:2000:TSA,
author = "Milo M. K. Martin and Daniel J. Sorin and Anastassia
Ailamaki and Alaa R. Alameldeen and Ross M. Dickson and
Carl J. Mauer and Kevin E. Moore and Manoj Plakal and
Mark D. Hill and David A. Wood",
title = "Timestamp snooping: an approach for extending {SMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "25--36",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nanda:2000:MPR,
author = "Ashwini Nanda and Kwok-Ken Mak and Krishnan Sugarvanam
and Ramendra K. Sahoo and Vijayaraghavan Soundararajan
and T. Basil Smith",
title = "{MemorIES3}: a programmable, real-time hardware
emulation tool for multiprocessor server design",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "37--48",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gibson:2000:FVS,
author = "Jeff Gibson and Robert Kunz and David Ofelt and Mark
Horowitz and John Hennessy and Mark Heinrich",
title = "{FLASH} vs. {(Simulated) FLASH}: closing the
simulation loop",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "49--58",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chou:2000:UML,
author = "Andy Chou and Benjamin Chelf and Dawson Engler and
Mark Heinrich",
title = "Using meta-level compilation to check {FLASH} protocol
code",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "59--70",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhoedjang:2000:EDA,
author = "Raoul A. F. Bhoedjang and Kees Verstoep and Tim
R{\"u}hl and Henri E. Bal and Rutger F. H. Hofman",
title = "Evaluating design alternatives for reliable
communication on high-speed networks",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "71--81",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mattson:2000:CS,
author = "Peter Mattson and William J. Dally and Scott Rixner
and Ujval J. Kapasi and John D. Owens",
title = "Communication scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "82--92",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hill:2000:SAD,
author = "Jason Hill and Robert Szewczyk and Alec Woo and Seth
Hollar and David Culler and Kristofer Pister",
title = "System architecture directions for networked sensors",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "93--104",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lebeck:2000:PAP,
author = "Alvin R. Lebeck and Xiaobo Fan and Heng Zeng and Carla
Ellis",
title = "Power aware page allocation",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "105--116",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Berger:2000:HSM,
author = "Emery D. Berger and Kathryn S. McKinley and Robert D.
Blumofe and Paul R. Wilson",
title = "{Hoard}: a scalable memory allocator for multithreaded
applications",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "117--128",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Flautner:2000:TLP,
author = "Kristi{\'a}n Flautner and Rich Uhlig and Steve
Reinhardt and Trevor Mudge",
title = "Thread-level parallelism and interactive performance
of desktop applications",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "129--138",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kawahito:2000:ENP,
author = "Motohiro Kawahito and Hideaki Komatsu and Toshio
Nakatani",
title = "Effective null pointer check elimination utilizing
hardware trap",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "139--149",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2000:FVL,
author = "Youtao Zhang and Jun Yang and Rajiv Gupta",
title = "Frequent value locality and value-centric data cache
design",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "150--159",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burrows:2000:EFV,
author = "M. Burrows and U. Erlingson and S-T. A. Leung and M.
T. Vandevoorde and C. A. Waldspurger and K. Walker and
W. E. Weihl",
title = "Efficient and flexible value sampling",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "160--167",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thekkath:2000:ASC,
author = "David Lie Chandramohan Thekkath and Mark Mitchell and
Patrick Lincoln and Dan Boneh and John Mitchell and
Mark Horowitz",
title = "Architectural support for copy and tamper resistant
software",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "168--177",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burke:2000:ASF,
author = "Jerome Burke and John McDonald and Todd Austin",
title = "Architectural support for fast symmetric-key
cryptography",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "178--189",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kubiatowicz:2000:OAG,
author = "John Kubiatowicz and David Bindel and Yan Chen and
Steven Czerwinski and Patrick Eaton and Dennis Geels
and Ramakrishna Gummadi and Sean Rhea and Hakim
Weatherspoon and Chris Wells and Ben Zhao",
title = "{OceanStore}: an architecture for global-scale
persistent storage",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "190--201",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Duesterwald:2000:SPH,
author = "Evelyn Duesterwald and Vasanth Bala",
title = "Software profiling for hot path prediction: less is
more",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "202--211",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zahir:2000:CCD,
author = "Rumi Zahir and Jonathan Ross and Dale Morris and Drew
Hess",
title = "{OS} and compiler considerations in the design of the
{IA-64} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "212--221",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Connors:2000:HSD,
author = "Daniel A. Connors and Hillery C. Hunter and Ben-Chung
Cheng and Wen-mei W. Hwu",
title = "Hardware support for dynamic activation of
compiler-directed computation reuse",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "222--233",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Snavely:2000:SJS,
author = "Allan Snavely and Dean M. Tullsen",
title = "Symbiotic job scheduling for a simultaneous
multithreaded processor",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "234--244",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Redstone:2000:AOS,
author = "Joshua A. Redstone and Susan J. Eggers and Henry M.
Levy",
title = "An analysis of operating system behavior on a
simultaneous multithreaded architecture",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "245--256",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sundaramoorthy:2000:SPI,
author = "Karthik Sundaramoorthy and Zach Purser and Eric
Rotenburg",
title = "Slipstream processors: improving both performance and
fault tolerance",
journal = j-COMP-ARCH-NEWS,
volume = "28",
number = "5",
pages = "257--268",
month = dec,
year = "2000",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wilkes:2001:MGF,
author = "Maurice V. Wilkes",
title = "The memory gap and the future of high performance
memories",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "2--7",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Manjikian:2001:MESa,
author = "Naraig Manjikian",
title = "Multiprocessor enhancements of the {SimpleScalar} tool
set",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "8--15",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2001:MAH,
author = "Frank Wang",
title = "A modified architecture for high-density {MRAM}",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "16--22",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Altman:2001:WWB,
author = "Erik R. Altman and David Kaeli",
title = "{WBT-2000}: {Workshop on Binary Translation 2000}",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "23--25",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Srivastava:2001:EOB,
author = "Amitabh Srivastava",
title = "Emerging opportunities for binary tools",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "26--26",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cain:2001:DBT,
author = "Harold W. Cain and Kevin M. Lepak and Mikko H.
Lipasti",
title = "A dynamic binary translation approach to architectural
simulation",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "27--36",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hilgendorf:2001:ITE,
author = "Rolf Hilgendorf and Wolfram Sauer",
title = "Instruction translation for an experimental {S/390}
processor",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "37--42",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ronsse:2001:JRJ,
author = "Michiel Ronsse and Koen {De Bosschere}",
title = "{JiTI}: a robust just in time instrumentation
technique",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "43--54",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ung:2001:OHP,
author = "David Ung and Cristina Cifuentes",
title = "Optimising hot paths in a dynamic binary translator",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "55--65",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gschwind:2001:OPE,
author = "Michael Gschwind and Erik Altman",
title = "Optimization and precise exceptions in dynamic
compilation",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "66--74",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2001:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "1",
pages = "75--77",
month = mar,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zilles:2001:EBP,
author = "Craig Zilles and Gurindar Sohi",
title = "Execution-based prediction using speculative slices",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "2--13",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Collins:2001:SPL,
author = "Jamison D. Collins and Hong Wang and Dean M. Tullsen
and Christopher Hughes and Yong-Fong Lee and Dan Lavery
and John P. Shen",
title = "Speculative precomputation: long-range prefetching of
delinquent loads",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "14--25",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Balasubramonian:2001:DAP,
author = "Rajeev Balasubramonian and Sandhya Dwarkadas and David
H. Albonesi",
title = "Dynamically allocating processor resources between
nearby and distant {ILP}",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "26--37",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Luk:2001:TML,
author = "Chi-Keung Luk",
title = "Tolerating memory latency through software-controlled
pre-execution in simultaneous multithreading
processors",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "40--51",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Annavaram:2001:DPD,
author = "Murali Annavaram and Jignesh M. Patel and Edward S.
Davidson",
title = "Data prefetching by dependence graph precomputation",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "52--61",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cuppu:2001:CLS,
author = "Vinodh Cuppu and Bruce Jacob",
title = "Concurrency, latency, or system overhead: which has
the largest impact on uniprocessor {DRAM}-system
performance?",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "62--71",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fields:2001:FPP,
author = "Brian Fields and Shai Rubin and Rastislav Bod{\'\i}k",
title = "Focusing processor policies via critical-path
prediction",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "74--85",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sherwood:2001:ADF,
author = "Timothy Sherwood and Brad Calder",
title = "Automated design of finite state machine predictors
for customized processors",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "86--97",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wu:2001:BER,
author = "Youfeng Wu and Dong-Yuan Chen and Jesse Fang",
title = "Better exploration of region-level value locality with
integrated computation reuse and value prediction",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "98--108",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wu:2001:CFF,
author = "Lisa Wu and Chris Weaver and Todd Austin",
title = "{CryptoManiac}: a fast flexible architecture for
secure communication",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "110--119",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yum:2001:QPC,
author = "Ki Hwan Yum and Eun Jung Kim and Chita R. Das",
title = "{QoS} provisioning in clusters: an investigation of
{Router} and {NIC} design",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "120--129",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Srinivasan:2001:LVC,
author = "Srikanth T. Srinivasan and Roy Dz-ching Ju and Alvin
R. Lebeck and Chris Wilkerson",
title = "Locality vs. criticality",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "132--143",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lai:2001:DBP,
author = "An-Chow Lai and Cem Fide and Babak Falsafi",
title = "Dead-block prediction \& dead-block correlating
prefetchers",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "144--154",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramirez:2001:CLO,
author = "Alex Ramirez and Luiz Andr{\'e} Barroso and Kourosh
Gharachorloo and Robert Cohn and Josep Larriba-Pey and
P. Geoffrey Lowney and Mateo Valero",
title = "Code layout optimizations for transaction processing
workloads",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "155--164",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Niemier:2001:EEW,
author = "Michael Thaddeus Niemier and Peter M. Kogge",
title = "Exploring and exploiting wire-level pipelining in
emerging technologies",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "166--177",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goldstein:2001:NSC,
author = "Seth Copen Goldstein and Mihai Budiu",
title = "{NanoFabrics}: spatial computing using molecular
electronics",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "178--191",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lie:2001:SME,
author = "David Lie and Andy Chou and Dawson Engler and David L.
Dill",
title = "A simple method for extracting models for protocol
code",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "192--203",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Prvulovic:2001:RAB,
author = "Milos Prvulovic and Mar{\'\i}a Jes{\'u}s Garzar{\'a}n
and Lawrence Rauchwerger and Josep Torrellas",
title = "Removing architectural bottlenecks to the scalability
of speculative parallelization",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "204--215",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bahar:2001:PER,
author = "R. Iris Bahar and Srilatha Manne",
title = "Power and energy reduction via pipeline balancing",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "218--229",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Folegnani:2001:EEI,
author = "Daniele Folegnani and Antonio Gonz{\'a}lez",
title = "Energy-effective issue logic",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "230--239",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kaxiras:2001:CDE,
author = "Stefanos Kaxiras and Zhigang Hu and Margaret
Martonosi",
title = "Cache decay: exploiting generational behavior to
reduce cache leakage power",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "240--251",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hughes:2001:VEM,
author = "Christopher J. Hughes and Praful Kaul and Sarita V.
Adve and Rohit Jain and Chanik Park and Jayanth
Srinivasan",
title = "Variability in the execution of multimedia
applications and implications for architecture",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "254--265",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sastry:2001:RPS,
author = "S. Subramanya Sastry and Rastislav Bod{\'\i}k and
James E. Smith",
title = "Rapid profiling via stratified sampling",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "2",
pages = "278--289",
month = may,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zilles:2001:BHC,
author = "Craig B. Zilles",
title = "Benchmark health considered harmful",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "3",
pages = "4--5",
month = jun,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thornock:2001:NTC,
author = "Niki C. Thornock and J. Kelly Flanagan",
title = "A national trace collection and distribution
resource",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "3",
pages = "6--10",
month = jun,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2001:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "3",
pages = "11--15",
month = jun,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Manjikian:2001:MESb,
author = "Naraig Manjikian",
title = "More enhancements of the {SimpleScalar} tool set",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "4",
pages = "5--12",
month = sep,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cantin:2001:CPS,
author = "Jason F. Cantin and Mark D. Hill",
title = "Cache performance for selected {SPEC CPU2000}
benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "4",
pages = "13--18",
month = sep,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2001:PLA,
author = "Jinsuo Zhang",
title = "The predictability of load address",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "4",
pages = "19--28",
month = sep,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2001:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "4",
pages = "29--31",
month = sep,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{El-Kharashi:2001:ATA,
author = "M. Watheq El-Kharashi and Fayez Elguibaly and Kin F.
Li",
title = "Adapting {Tomasulo}'s algorithm for bytecode folding
based {Java} processors",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "1--8",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bartolini:2001:PAC,
author = "S. Bartolini and R. Giorgi and J. Protic and C. A.
Prete and M. Valero",
title = "Parallel architecture and compilation techniques:
selection of workshop papers, {Guest Editors}'
introduction",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "9--12",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Acquaviva:2001:ECE,
author = "Andrea Acquaviva and Luca Benini and Bruno Ricc{\'o}",
title = "Energy characterization of embedded real-time
operating systems",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "13--18",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moncusi:2001:IES,
author = "M. Angels Moncusi and Alex Arenas and Jesus Labarta",
title = "Improving energy saving in hard real time systems via
a modified dual priority scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "19--24",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vahid:2001:PCP,
author = "Frank Vahid and Rilesh Patel and Greg Stitt",
title = "Propagating constants past software to hardware
peripherals in fixed-application embedded systems",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "25--30",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aslot:2001:PCS,
author = "Vishal Aslot and Rudolf Eigenmann",
title = "Performance characteristics of the {SPEC OMP2001}
benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "31--40",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bull:2001:MSO,
author = "J. Mark Bull and Darragh O'Neill",
title = "A microbenchmark suite for {OpenMP 2.0}",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "41--48",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nikolopoulos:2001:EMA,
author = "D. S. Nikolopoulos and E. Artiaga and E. Ayguad{\'e}
and J. Labarta",
title = "Exploiting memory affinity in {OpenMP} through
schedule reuse",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "49--55",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sung:2001:MDA,
author = "Michael Sung and Ronny Krashinsky and Krste
Asanovi{\'c}",
title = "Multithreading decoupled architectures for
complexity-effective general purpose computing",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "56--61",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Talla:2001:MDA,
author = "Deependra Talla and Lizy K. John",
title = "{MediaBreeze}: a decoupled architecture for
accelerating multimedia applications",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "62--67",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nakajima:2001:MCS,
author = "Tatsuo Nakajima",
title = "A middleware component supporting flexible user
interaction for networked home appliances",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "68--75",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Touzet:2001:SSE,
author = "David Touzet and Jean-Marc Menaud and Fr{\'e}d{\'e}ric
Weis and Paul Couderc and Michel Ban{\^a}tre",
title = "{SIDE} surfer: enriching casual meetings with
spontaneous information gathering",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "76--83",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Altman:2001:WBT,
author = "Erik R. Altman and David R. Kaeli",
title = "{Workshop on Binary Translation 2001}",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "84--85",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2001:INd,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "86--90",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Desikan:2002:EME,
author = "Rajagopalan Desikan and Doug Burger and Stephen W.
Keckler and Llorenc Cruz and Fernando Latorre and
Antonio Gonz{\'a}lez and Mateo Valero",
title = "Errata on {``Measuring Experimental Error in
Microprocessor Simulation''}",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "1",
pages = "2--4",
month = mar,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:2002:ATI,
author = "Fu-Chi Chang and Chia-Jiu Wang",
title = "Architectural tradeoff in implementing {RSA}
processors",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "1",
pages = "5--11",
month = mar,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Uht:2002:DEE,
author = "Augustus K. Uht",
title = "Disjoint {Eager Execution}: what it is \slash what it
is not",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "1",
pages = "12--14",
month = mar,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2002:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "1",
pages = "15--21",
month = mar,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:36 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hartstein:2002:OPD,
author = "A. Hartstein and Thomas R. Puzak",
title = "The optimum pipeline depth for a microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "7--13",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hrishikesh:2002:OLD,
author = "M. S. Hrishikesh and Doug Burger and Norman P. Jouppi
and Stephen W. Keckler and Keith I. Farkas and
Premkishore Shivakumar",
title = "The optimal logic depth per pipeline stage is 6 to 8
{FO4} inverter delays",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "14--24",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sprangle:2002:IPP,
author = "Eric Sprangle and Doug Carmean",
title = "Increasing processor performance by implementing
deeper pipelines",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "25--34",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ernst:2002:EDS,
author = "Dan Ernst and Todd Austin",
title = "Efficient dynamic scheduling through tag elimination",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "37--46",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fields:2002:SMP,
author = "Brian Fields and Rastislav Bod{\'\i}k and Mark D.
Hill",
title = "{Slack}: maximizing performance under technological
constraints",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "47--58",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lebeck:2002:LFI,
author = "Alvin R. Lebeck and Jinson Koppanalil and Tong Li and
Jaidev Patwardhan and Eric Rotenberg",
title = "A large, fast instruction window for tolerating cache
misses",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "59--70",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2002:ISM,
author = "Ho-Seop Kim and James E. Smith",
title = "An instruction set and microarchitecture for
instruction level distributed processing",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "71--81",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vijaykumar:2002:TFR,
author = "T. N. Vijaykumar and Irith Pomeranz and Karl Cheng",
title = "Transient-fault recovery using simultaneous
multithreading",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "87--98",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mukherjee:2002:DDE,
author = "Shubhendu S. Mukherjee and Michael Kontz and Steven K.
Reinhardt",
title = "Detailed design and evaluation of redundant
multithreading alternatives",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "99--110",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Prvulovic:2002:RCE,
author = "Milos Prvulovic and Zheng Zhang and Josep Torrellas",
title = "{ReVive}: cost-effective architectural support for
rollback recovery in shared-memory multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "111--122",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sorin:2002:SIA,
author = "Daniel J. Sorin and Milo M. K. Martin and Mark D. Hill
and David A. Wood",
title = "{SafetyNet}: improving the availability of shared
memory multiprocessors with global checkpoint\slash
recovery",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "123--134",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Heo:2002:DFG,
author = "Seongmoo Heo and Kenneth Barr and Mark Hampton and
Krste Asanovi{\'c}",
title = "Dynamic fine-grain leakage reduction using
leakage-biased bitlines",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "137--147",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Flautner:2002:DCS,
author = "Kriszti{\'a}n Flautner and Nam Sung Kim and Steve
Martin and David Blaauw and Trevor Mudge",
title = "Drowsy caches: simple techniques for reducing leakage
power",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "148--157",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Iyer:2002:PPE,
author = "Anoop Iyer and Diana Marculescu",
title = "Power and performance evaluation of globally
asynchronous locally synchronous processors",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "158--168",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Solihin:2002:UUL,
author = "Yan Solihin and Jaejin Lee and Josep Torrellas",
title = "Using a user-level memory thread for correlation
prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "171--182",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lewis:2002:AIM,
author = "Jarrod A. Lewis and Bryan Black and Mikko H. Lipasti",
title = "Avoiding initialization misses to the heap",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "183--194",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kandiraju:2002:GDT,
author = "Gokul B. Kandiraju and Anand Sivasubramaniam",
title = "Going the distance for {TLB} prefetching: an
application-driven study",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "195--206",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hu:2002:TMS,
author = "Zhigang Hu and Stefanos Kaxiras and Margaret
Martonosi",
title = "Timekeeping in the memory system: predicting and
optimizing memory behavior",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "209--220",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2002:IOD,
author = "Ilhyun Kim and Mikko H. Lipasti",
title = "Implementing optimizations at decode time",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "221--232",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dhodapkar:2002:MMC,
author = "Ashutosh S. Dhodapkar and James E. Smith",
title = "Managing multi-configuration hardware via dynamic
working set analysis",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "233--244",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Buonadonna:2002:QPI,
author = "Philip Buonadonna and David Culler",
title = "Queue pair {IP}: a hybrid architecture for system area
networks",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "247--256",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhou:2002:EVC,
author = "Yuanyuan Zhou and Angelos Bilas and Suresh Jagannathan
and Cezary Dubnicki and James F. Philbin and Kai Li",
title = "Experiences with {VI} communication for database
storage",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "257--268",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pajuelo:2002:SDV,
author = "Alex Pajuelo and Antonio Gonz{\'a}lez and Mateo
Valero",
title = "Speculative dynamic vectorization",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "271--280",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Espasa:2002:TVE,
author = "Roger Espasa and Federico Ardanaz and Joel Emer and
Stephen Felix and Julio Gago and Roger Gramunt and
Isaac Hernandez and Toni Juan and Geoff Lowney and
Matthew Mattina and Andr{\'e} Seznec",
title = "{Tarantula}: a vector extension to the {Alpha}
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "281--292",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:2002:DTA,
author = "Andr{\'e} Seznec and Stephen Felix and Venkata
Krishnan and Yiannakis Sazeides",
title = "Design tradeoffs for the {Alpha EV8} conditional
branch predictor",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "295--306",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chappell:2002:DPB,
author = "Robert S. Chappell and Francis Tseng and Adi Yoaz and
Yale N. Patt",
title = "Difficult-path branch prediction using subordinate
microthreads",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "307--317",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Raasch:2002:SIQ,
author = "Steven E. Raasch and Nathan L. Binkert and Steven K.
Reinhardt",
title = "A scalable instruction queue design using dependence
chains",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "2",
pages = "318--329",
month = may,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Steele:2002:OHH,
author = "Ken Steele and Jason Waterman and Eugene Weinstein",
title = "The {Oxygen H21} handheld",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "3--4",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Keen:2002:HSC,
author = "Diana Keen and Frederic T. Chong",
title = "Hardware-software co-design of embedded
sensor-actuator networks",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "5--6",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kondo:2002:SCC,
author = "Masaaki Kondo and Motonobu Fujita and Hiroshi
Nakamura",
title = "Software-controlled on-chip memory for
high-performance and low-power computing",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "7--8",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sahoo:2002:SHA,
author = "Ramendra K. Sahoo and Myung Bae and Jose Moreira",
title = "Semi-hierarchical approach for reliability,
availability, and serviceability of cellular systems",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "9--10",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eberle:2002:MDC,
author = "Hans Eberle",
title = "Monitoring and diagnosing computer systems by radio
communication",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "11--12",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thies:2002:CML,
author = "William Thies and Michal Karczmarek and Michael Gordon
and David Maze and Jeremy Wong and Henry Hoffmann and
Matthew Brown and Saman Amarasinghe",
title = "A common machine language for grid-based
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "13--14",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2002:NAM,
author = "Frank Wang and Na Helian and Farhi Marir",
title = "A novel associative memory architecture for quick
matching",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "15--16",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parker:2002:CUL,
author = "Mike Parker",
title = "A case for user-level interrupts",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "17--18",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burtscher:2002:IIF,
author = "Martin Burtscher",
title = "An improved index function for {(D)FCM} predictors",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "19--24",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2002:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "3",
pages = "25--26",
month = jun,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gomez:2002:ASA,
author = "I. G{\`o}mez and L. Pi{\~n}uel and M. Prieto and F.
Tirado",
title = "Analysis of simulation-adapted {SPEC 2000}
benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "4",
pages = "4--10",
month = sep,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2002:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "4",
pages = "11--16",
month = sep,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Estrin:2002:KAS,
author = "Deborah Estrin",
title = "Keynote address: {Sensor} network research: emerging
challenges for architecture, systems, and languages",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "1--4",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rajwar:2002:TLF,
author = "Ravi Rajwar and James R. Goodman",
title = "Transactional lock-free execution of lock-based
programs",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "5--17",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martinez:2002:SSA,
author = "Jos{\'e} F. Mart{\'\i}nez and Josep Torrellas",
title = "Speculative synchronization: applying thread-level
speculation to explicitly parallel applications",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "18--29",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lepak:2002:TSS,
author = "Kevin M. Lepak and Mikko H. Lipasti",
title = "Temporally silent stores",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "30--41",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sherwood:2002:ACL,
author = "Timothy Sherwood and Erez Perelman and Greg Hamerly
and Brad Calder",
title = "Automatically characterizing large scale program
behavior",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "45--57",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ogata:2002:BFO,
author = "Kazunori Ogata and Hideaki Komatsu and Toshio
Nakatani",
title = "Bytecode fetch optimization for a {Java} interpreter",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "58--67",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:2002:UIO,
author = "Tao Li and Lizy Kurian John and Anand Sivasubramaniam
and N. Vijaykrishnan and Juan Rubio",
title = "Understanding and improving operating system effects
in control flow prediction",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "68--80",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Levis:2002:MTV,
author = "Philip Levis and David Culler",
title = "{Mat{\'e}}: a tiny virtual machine for sensor
networks",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "85--95",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Juang:2002:EEC,
author = "Philo Juang and Hidekazu Oki and Yong Wang and
Margaret Martonosi and Li Shiuan Peh and Daniel
Rubenstein",
title = "Energy-efficient computing for wildlife tracking:
design tradeoffs and early experiences with
{ZebraNet}",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "96--107",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kirovski:2002:ETS,
author = "Darko Kirovski and Milenko Drini{\'c} and Miodrag
Potkonjak",
title = "Enabling trusted software integrity",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "108--120",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zeng:2002:EME,
author = "Heng Zeng and Carla S. Ellis and Alvin R. Lebeck and
Amin Vahdat",
title = "{ECOSystem}: managing energy as a first class
operating system resource",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "123--132",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ashok:2002:CMC,
author = "Raksit Ashok and Saurabh Chheda and Csaba Andras
Moritz",
title = "{Cool-Mem}: combining statically speculative memory
accessing with selective address translation for energy
efficiency",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "133--143",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sasanka:2002:JLG,
author = "Ruchira Sasanka and Christopher J. Hughes and Sarita
V. Adve",
title = "Joint local and global hardware adaptations for
energy",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "144--155",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2002:DEC,
author = "Dongkeun Kim and Donald Yeung",
title = "Design and evaluation of compiler algorithms for
pre-execution",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "159--170",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhai:2002:COS,
author = "Antonia Zhai and Christopher B. Colohan and J. Gregory
Steffan and Todd C. Mowry",
title = "Compiler optimization of scalar value communication
between speculative threads",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "171--183",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oplinger:2002:ESR,
author = "Jeffrey Oplinger and Monica S. Lam",
title = "Enhancing software reliability with speculative
threads",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "184--196",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Butts:2002:DDI,
author = "J. Adam Butts and Guri Sohi",
title = "Dynamic dead-instruction detection and elimination",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "199--210",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2002:ANU,
author = "Changkyu Kim and Doug Burger and Stephen W. Keckler",
title = "An adaptive, non-uniform cache structure for
wire-delay dominated on-chip caches",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "211--222",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mukherjee:2002:CSA,
author = "Shubhendu S. Mukherjee and Federico Silla and Peter
Bannon and Joel Emer and Steve Lang and David Webb",
title = "A comparative study of arbitration algorithms for the
{Alpha 21364} pipelined router",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "223--234",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2002:IWS,
author = "Hyong-youb Kim and Vijay S. Pai and Scott Rixner",
title = "Increasing {Web} server throughput with network
interface data caching",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "239--250",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kohler:2002:PLO,
author = "Eddie Kohler and Robert Morris and Benjie Chen",
title = "Programming language optimizations for modular router
configurations",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "251--263",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sivathanu:2002:ERA,
author = "Muthian Sivathanu and Andrea C. Arpaci-Dusseau and
Remzi H. Arpaci-Dusseau",
title = "Evolving {RPC} for active storage",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "264--276",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cooksey:2002:SCD,
author = "Robert Cooksey and Stephan Jourdan and Dirk Grunwald",
title = "A stateless, content-directed data prefetching
mechanism",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "279--290",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gordon:2002:SCC,
author = "Michael I. Gordon and William Thies and Michal
Karczmarek and Jasper Lin and Ali S. Meli and Andrew A.
Lamb and Chris Leger and Jeremy Wong and Henry Hoffmann
and David Maze and Saman Amarasinghe",
title = "A stream compiler for communication-exposed
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "291--303",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Witchel:2002:MMP,
author = "Emmett Witchel and Josh Cates and Krste Asanovi{\'c}",
title = "{Mondrian} memory protection",
journal = j-COMP-ARCH-NEWS,
volume = "30",
number = "5",
pages = "304--316",
month = dec,
year = "2002",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dennis:2003:FBM,
author = "Jack B. Dennis",
title = "Fresh {Breeze}: a multiprocessor chip architecture
guided by modular programming principles",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "7--15",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Morano:2003:RHI,
author = "D. Morano and A. Khalafi and D. R. Kaeli and A. K.
Uht",
title = "Realizing high {IPC} through a scalable memory-latency
tolerant multipath microarchitecture",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "16--25",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Almasi:2003:DCD,
author = "George Alm{\'a}si and C{\u{a}}lin Ca{\c{s}}caval and
Jos{\'e} G. Casta{\~n}os and Monty Denneau and Derek
Lieber and Jos{\'e} E. Moreira and Henry S. {Warren,
Jr.}",
title = "Dissecting {Cyclops}: a detailed analysis of a
multithreaded architecture",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "26--38",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zahran:2003:CMH,
author = "Mohamed M. Zahran",
title = "On cache memory hierarchy for {Chip-Multiprocessor}",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "39--48",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grewal:2003:EAC,
author = "Gary Gr{\'e}wal and Tom Wilson and Andrew Morton",
title = "An {EGA} approach to the compile-time assignment of
data to multiple memories in digital-signal
processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "49--59",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramacher:2003:GVP,
author = "Ulrich Ramacher and Nico Br{\"u}s and Ulrich Hachmann
and Jens Harnisch and Wolfgang Raab and Axel Techmer",
title = "{100 GOPS} vision processor for automotive
applications",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "60--68",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pitsianis:2003:IVM,
author = "Nikos P. Pitsianis and Gerald G. Pechanek",
title = "Indirect {VLIW} memory allocation for the {ManArray}
multiprocessor {DSP}",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "69--74",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shimizu:2003:TLS,
author = "Naohiko Shimizu and Ken Takatori",
title = "A transparent {Linux} super page kernel for {Alpha},
{Sparc64} and {IA32}: reducing {TLB} misses of
applications",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "75--84",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bechini:2003:FGD,
author = "Alessio Bechini and Pierfrancesco Foglia and Cosimo
Antonio Prete",
title = "Fine-grain design space exploration for a cartographic
{SoC} multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "85--92",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2003:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "1",
pages = "93--96",
month = mar,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Skadron:2003:TAM,
author = "Kevin Skadron and Mircea R. Stan and Wei Huang and
Sivakumar Velusamy and Karthik Sankaranarayanan and
David Tarjan",
title = "Temperature-aware microarchitecture",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "2--13",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Magklis:2003:PBD,
author = "Grigorios Magklis and Michael L. Scott and Greg
Semeraro and David H. Albonesi and Steven Dropsho",
title = "Profile-based dynamic voltage and frequency scaling
for a multiple clock domain microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "14--27",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2003:HPA,
author = "Ilhyun Kim and Mikko H. Lipasti",
title = "Half-price architecture",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "28--38",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Park:2003:IMP,
author = "Il Park and Babak Falsafi and T. N. Vijaykumar",
title = "Implicitly-multithreaded processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "39--51",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Citron:2003:MPM,
author = "Daniel Citron",
title = "{MisSPECulation}: partial and misleading use of {SPEC
CPU2000} in computer architecture conferences",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "52--61",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tseng:2003:BMR,
author = "Jessica H. Tseng and Krste Asanovi{\'c}",
title = "Banked multiported register files for high-frequency
superscalar microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "62--71",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Powell:2003:PDM,
author = "Michael D. Powell and T. N. Vijaykumar",
title = "Pipeline damping: a microarchitectural technique to
reduce inductive noise in supply voltage",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "72--83",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wunderlich:2003:SAM,
author = "Roland E. Wunderlich and Thomas F. Wenisch and Babak
Falsafi and James C. Hoe",
title = "{SMARTS}: accelerating microarchitecture simulation
via rigorous statistical sampling",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "84--97",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gomaa:2003:TFR,
author = "Mohamed Gomaa and Chad Scarbrough and T. N. Vijaykumar
and Irith Pomeranz",
title = "Transient-fault recovery for chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "98--109",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Prvulovic:2003:RUT,
author = "Milos Prvulovic and Josep Torrellas",
title = "{ReEnact}: using thread-level speculation mechanisms
to debug data races in multithreaded codes",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "110--121",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Xu:2003:FDR,
author = "Min Xu and Rastislav Bodik and Mark D. Hill",
title = "A ``flight data recorder'' for enabling full-system
multiprocessor deterministic replay",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "122--135",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2003:HCC,
author = "Chuanjun Zhang and Frank Vahid and Walid Najjar",
title = "A highly configurable cache architecture for embedded
systems",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "136--146",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Buyuktosunoglu:2003:EEC,
author = "Alper Buyuktosuno{\u{g}}lu and Tejas Karkhanis and
David H. Albonesi and Pradip Bose",
title = "Energy efficient co-adaptive instruction fetch and
issue",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "147--156",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huang:2003:PAP,
author = "Michael C. Huang and Jose Renau and Josep Torrellas",
title = "Positional adaptation of processors: application to
energy reduction",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "157--168",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gurumurthi:2003:DDS,
author = "Sudhanva Gurumurthi and Anand Sivasubramaniam and
Mahmut Kandemir and Hubertus Franke",
title = "{DRPM}: dynamic speed control for power management in
server class disks",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "169--181",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martin:2003:TCD,
author = "Milo M. K. Martin and Mark D. Hill and David A. Wood",
title = "Token coherence: decoupling performance and
correctness",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "182--193",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singh:2003:GLB,
author = "Arjun Singh and William J. Dally and Amit K. Gupta and
Brian Towles",
title = "{GOAL}: a load-balanced adaptive routing algorithm for
torus networks",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "194--205",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martin:2003:UDS,
author = "Milo M. K. Martin and Pacia J. Harper and Daniel J.
Sorin and Mark D. Hill and David A. Wood",
title = "Using destination-set prediction to improve the
latency\slash bandwidth tradeoff in shared-memory
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "206--217",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cvetanovic:2003:PAA,
author = "Zarka Cvetanovic",
title = "Performance analysis of the {Alpha 21364}-based {HP
GS1280} multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "218--229",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oberoi:2003:PFE,
author = "Paramjit S. Oberoi and Gurindar S. Sohi",
title = "Parallelism in the front-end",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "230--240",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:2003:EAP,
author = "Andr{\'e} Seznec and Antony Fraboulet",
title = "Effective ahead pipelining of instruction block
address generation",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "241--252",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ernst:2003:CBF,
author = "Dan Ernst and Andrew Hamel and Todd Austin",
title = "{Cyclone}: a broadcast-free dynamic instruction
scheduler with selective replay",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "253--263",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhargava:2003:IDC,
author = "Ravi Bhargava and Lizy K. John",
title = "Improving dynamic cluster assignment for clustered
trace cache processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "264--274",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Balasubramonian:2003:DMC,
author = "Rajeev Balasubramonian and Sandhya Dwarkadas and David
H. Albonesi",
title = "Dynamically managing the communication-parallelism
trade-off in future clustered processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "275--287",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sherwood:2003:PMA,
author = "Timothy Sherwood and George Varghese and Brad Calder",
title = "A pipelined memory architecture for high throughput
network processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "288--299",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hasan:2003:EUM,
author = "Jahangir Hasan and Satish Chandra and T. N.
Vijaykumar",
title = "Efficient use of memory bandwidth to improve network
processor throughput",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "300--313",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomas:2003:IBP,
author = "Renju Thomas and Manoj Franklin and Chris Wilkerson
and Jared Stark",
title = "Improving branch prediction by dynamic dataflow-based
identification of correlated branches from a large
global history",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "314--323",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhou:2003:DGS,
author = "Huiyang Zhou and Jill Flanagan and Thomas M. Conte",
title = "Detecting global stride locality in value streams",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "324--335",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sherwood:2003:PTP,
author = "Timothy Sherwood and Suleyman Sair and Brad Calder",
title = "Phase tracking and prediction",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "336--349",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anantaraman:2003:VSA,
author = "Aravindh Anantaraman and Kiran Seth and Kaustubh Patil
and Eric Rotenberg and Frank Mueller",
title = "Virtual simple architecture {(VISA)}: exceeding the
complexity limit in safe real-time systems",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "350--361",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Corliss:2003:DPM,
author = "Marc L. Corliss and E. Christopher Lewis and Amir
Roth",
title = "{DISE}: a programmable macro engine for customizing
applications",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "362--373",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oskin:2003:BQW,
author = "Mark Oskin and Frederic T. Chong and Isaac L. Chuang
and John Kubiatowicz",
title = "Building quantum wires: the long and the short of it",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "374--387",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2003:GRP,
author = "Zhenlin Wang and Doug Burger and Kathryn S. McKinley
and Steven K. Reinhardt and Charles C. Weems",
title = "Guided region prefetching: a cooperative
hardware\slash software approach",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "388--398",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kozyrakis:2003:OLC,
author = "Christos Kozyrakis and David Patterson",
title = "Overcoming the limitations of conventional vector
processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "399--409",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Suh:2003:PAP,
author = "Jinwoo Suh and Eun-Gyu Kim and Stephen P. Crago and
Lakshmi Srinivasan and Matthew C. French",
title = "A performance analysis of {PIM}, stream processing,
and tiled processing on memory-intensive signal
processing kernels",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "410--421",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sankaralingam:2003:EIT,
author = "Karthikeyan Sankaralingam and Ramadass Nagarajan and
Haiming Liu and Changkyu Kim and Jaehyuk Huh and Doug
Burger and Stephen W. Keckler and Charles R. Moore",
title = "Exploiting {ILP}, {TLP}, and {DLP} with the
polymorphous {TRIPS} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "422--433",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2003:JSD,
author = "Michael K. Chen and Kunle Olukotun",
title = "The {Jrpm} system for dynamically parallelizing {Java}
programs",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "2",
pages = "434--446",
month = may,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fong:2003:CAA,
author = "Anthony S. Fong",
title = "A computer architecture with access control and cache
option tags on individual instruction operands",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "3",
pages = "1--5",
month = jun,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tan:2003:DAP,
author = "Edwin J. Tan and Wendi B. Heinzelman",
title = "{DSP} architectures: past, present and futures",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "3",
pages = "6--19",
month = jun,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vintan:2003:ABP,
author = "Lucian N. Vintan and Marius Sbera and Ioan Z. Mihu and
Adrian Florea",
title = "An alternative to branch prediction: pre-computed
branches",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "3",
pages = "20--29",
month = jun,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Heinrich:2003:OWA,
author = "Mark Heinrich and Mainak Chaudhuri",
title = "Ocean warning: avoid drowning",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "3",
pages = "30--32",
month = jun,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lafitte:2003:QMC,
author = "Jean-Louis Lafitte",
title = "Qualitatively matching computer architecture with
{Turing} machine",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "3",
pages = "33--41",
month = jun,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Koushiro:2003:TLV,
author = "Takenori Koushiro and Toshinori Sato and Itsujiro
Arita",
title = "A trace-level value predictor for {Contrail}
processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "3",
pages = "42--47",
month = jun,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2003:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "3",
pages = "48--54",
month = jun,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:00 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorup:2003:CPM,
author = "Mikkel Thorup",
title = "Combinatorial power in multimedia processors",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "4",
pages = "5--11",
month = sep,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hau:2003:SJA,
author = "Gary K. W. Hau and Anthony Fong and Mok Pak Lun",
title = "Support of {Java API} for the {jHISC} system",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "4",
pages = "12--17",
month = sep,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lun:2003:MMO,
author = "Mok Pak Lun and Richard Li and Anthony Fong",
title = "Method manipulation in an object-oriented processor",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "4",
pages = "18--25",
month = sep,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2003:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "4",
pages = "26--32",
month = sep,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:15 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Breen:2003:AAA,
author = "Kristopher C. Breen and Duncan G. Elliott",
title = "Aliasing and anti-aliasing in branch history table
prediction",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "5",
pages = "1--4",
month = dec,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yu:2003:TBS,
author = "Ryan W. S. Yu and Gary K. W. Hau and Anthony S. Fong",
title = "Test bench for software development of object-oriented
processor",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "5",
pages = "5--9",
month = dec,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lun:2003:OOP,
author = "Mok Pak Lun and Anthony Fong and Gary K. W. Hau",
title = "Object-oriented processor requirements with
instruction analysis of {Java} programs",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "5",
pages = "10--15",
month = dec,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2003:INd,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "31",
number = "5",
pages = "16--21",
month = dec,
year = "2003",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:23 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{John:2004:MFS,
author = "Lizy Kurian John",
title = "More on finding a single number to indicate overall
performance of a benchmark suite",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "1",
pages = "3--8",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2004:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "1",
pages = "9--13",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Taylor:2004:ERM,
author = "Michael Bedford Taylor and Walter Lee and Jason Miller
and David Wentzlaff and Ian Bratt and Ben Greenwald and
Henry Hoffmann and Paul Johnson and Jason Kim and James
Psota and Arvind Saraf and Nathan Shnidman and Volker
Strumpen and Matt Frank and Saman Amarasinghe and Anant
Agarwal",
title = "Evaluation of the Raw Microprocessor: An
Exposed-Wire-Delay Architecture for {ILP} and
{Streams}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "2--2",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2004:GCC,
author = "Anonymous",
title = "General {Co-Chair}'s Message",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "9--9",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2004:PCM,
author = "Anonymous",
title = "Program {Chair}'s Message",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "10--10",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2004:C,
author = "Anonymous",
title = "Committees",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "11--11",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2004:Ra,
author = "Anonymous",
title = "Reviewers",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "13--13",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ahn:2004:EIS,
author = "Jung Ho Ahn and William J. Dally and Brucek Khailany
and Ujval J. Kapasi and Abhishek Das",
title = "Evaluating the {Imagine Stream Architecture}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "14--14",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sias:2004:FTI,
author = "John W. Sias and Sain-zee Ueng and Geoff A. Kent and
Ian M. Steiner and Erik M. Nystrom and Wen-mei W. Hwu",
title = "Field-testing {IMPACT EPIC} research results in
{Itanium 2}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "26--26",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vijaykumar:2004:WDP,
author = "T. N. Vijaykumar and Zeshan Chishti",
title = "Wire Delay is Not a Problem for {SMT} (In the Near
Future)",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "40--40",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Krashinsky:2004:VTA,
author = "Ronny Krashinsky and Christopher Batten and Mark
Hampton and Steve Gerding and Brian Pharris and Jared
Casper and Krste Asanovic",
title = "The Vector-Thread Architecture",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "52--52",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:2004:SIH,
author = "Rakesh Kumar and Dean M. Tullsen and Parthasarathy
Ranganathan and Norman P. Jouppi and Keith I. Farkas",
title = "Single-{ISA} Heterogeneous Multi-Core Architectures
for Multithreaded Workload Performance",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "64--64",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chou:2004:MOE,
author = "Yuan Chou and Brian Fahs and Santosh Abraham",
title = "Microarchitecture Optimizations for Exploiting
Memory-Level Parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "76--76",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cain:2004:MOV,
author = "Harold W. Cain and Mikko H. Lipasti",
title = "Memory Ordering: a Value-Based Approach",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "90--90",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hammond:2004:TMC,
author = "Lance Hammond and Vicky Wong and Mike Chen and Brian
D. Carlstrom and John D. Davis and Ben Hertzberg and
Manohar K. Prabhu and Honggo Wijaya and Christos
Kozyrakis and Kunle Olukotun",
title = "Transactional Memory Coherence and Consistency",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "102--102",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hangal:2004:TPV,
author = "Sudheendra Hangal and Durgam Vahia and Chaiyasit
Manovit and Juin-Yeu Joseph Lu",
title = "{TSOtool}: a Program for Verifying Memory Systems
Using the Memory Consistency Model",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "114--114",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chaudhuri:2004:SAN,
author = "Mainak Chaudhuri and Mark Heinrich",
title = "{SMTp}: {An Architecture} for {Next-generation
Scalable Multi-threading}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "124--124",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hughes:2004:FAF,
author = "Christopher J. Hughes and Sarita V. Adve",
title = "A {Formal Approach} to {Frequent Energy Adaptations}
for {Multimedia Applications}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "138--138",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oliver:2004:SMC,
author = "John Oliver and Ravishankar Rao and Paul Sultana and
Jedidiah Crandall and Erik Czernikowski and Leslie W.
{Jones IV} and Diana Franklin and Venkatesh Akella and
Frederic T. Chong",
title = "{Synchroscalar}: a Multiple Clock Domain, Power-Aware,
Tile-Based Embedded Processor",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "150--150",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rosner:2004:PAT,
author = "Roni Rosner and Yoav Almog and Micha Moffie and
Naftali Schwartz and Avi Mendelson",
title = "Power Awareness through Selective Dynamically
Optimized Traces",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "162--162",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bairavasundaram:2004:XRN,
author = "Lakshmi N. Bairavasundaram and Muthian Sivathanu and
Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "{X-RAY}: a Non-Invasive Exclusive Caching Mechanism
for {RAIDs}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "176--176",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mullins:2004:LLV,
author = "Robert Mullins and Andrew West and Simon Moore",
title = "Low-Latency Virtual-Channel Routers for On-Chip
Networks",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "188--188",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Puente:2004:ICR,
author = "V. Puente and J. A. Gregorio and F. Vallejo and R.
Beivide",
title = "{Immunet}: a Cheap and Robust Fault-Tolerant Packet
Routing Mechanism",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "198--198",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alameldeen:2004:ACC,
author = "Alaa R. Alameldeen and David A. Wood",
title = "Adaptive Cache Compression for High-Performance
Processors",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "212--212",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhou:2004:IEA,
author = "Pin Zhou and Feng Qin and Wei Liu and Yuanyuan Zhou
and Josep Torrellas",
title = "{iWatcher}: Efficient Architectural Support for
Software Debugging",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "224--224",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yehia:2004:SDI,
author = "Sami Yehia and Olivier Temam",
title = "From Sequences of Dependent Instructions to Functions:
An Approach for Improving Performance without {ILP} or
Speculation",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "238--238",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Falcon:2004:PCH,
author = "Ayose Falcon and Jared Stark and Alex Ramirez and
Konrad Lai and Mateo Valero",
title = "Prophet\slash Critic Hybrid Branch Prediction",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "250--250",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weaver:2004:TRS,
author = "Christopher Weaver and Joel Emer and Shubhendu S.
Mukherjee and Steven K. Reinhardt",
title = "Techniques to Reduce the Soft Error Rate of a
High-Performance Microprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "264--264",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Srinivasan:2004:CLR,
author = "Jayanth Srinivasan and Sarita V. Adve and Pradip Bose
and Jude A. Rivers",
title = "The Case for Lifetime Reliability-Aware
Microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "276--276",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Powell:2004:ERB,
author = "Michael D. Powell and T. N. Vijaykumar",
title = "Exploiting Resonant Behavior to Reduce Inductive
Noise",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "288--288",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Butts:2004:UBR,
author = "J. Adam Butts and Gurindar S. Sohi",
title = "Use-Based Register Caching with Decoupled Indexing",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "302--302",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gonzalez:2004:CAI,
author = "Gonzalez Gonzalez and Adrian Cristal and Daniel Ortega
and Alexander Veidenbaum and Mateo Valero",
title = "A Content Aware Integer Register File Organization",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "314--314",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lipasti:2004:PRI,
author = "Mikko H. Lipasti and Brian R. Mestan and Erika
Gunadi",
title = "Physical Register Inlining",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "325--325",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Karkhanis:2004:FOS,
author = "Tejas S. Karkhanis and James E. Smith",
title = "A First-Order Superscalar Processor Model",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "338--338",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eeckhout:2004:CFM,
author = "Lieven Eeckhout and Robert H. {Bell Jr.} and Bastiaan
Stougie and Koen {De Bosschere} and Lizy K. John",
title = "Control Flow Modeling in Statistical Simulation for
Accurate and Efficient Processor Design Studies",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "350--350",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Iyer:2004:ESI,
author = "Bharath Iyer and Sadagopan Srinivasan and Bruce
Jacob",
title = "Extended Split-Issue: Enabling Flexibility in the
Hardware Implementation of {NUAL VLIW DSPs}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "364--364",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parashar:2004:CEA,
author = "Angshuman Parashar and Sudhanva Gurumurthi and Anand
Sivasubramaniam",
title = "A Complexity-Effective Approach to {ALU} Bandwidth
Enhancement for Instruction-Level Temporal Redundancy",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "376--376",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2004:AI,
author = "Anonymous",
title = "Author Index",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "2",
pages = "387--387",
month = mar,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:45 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cristal:2004:CRC,
author = "Adri{\'a}n Cristal and Jos{\'e} F. Mart{\'\i}nez and
Josep Llosa and Mateo Valero",
title = "A case for resource-conscious out-of-order processors:
towards kilo-instruction in-flight processors",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "3--10",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kundu:2004:CSI,
author = "Partha Kundu and Murali Annavaram and Trung Diep and
John Shen",
title = "A case for shared instruction cache on chip
multiprocessors running {OLTP}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "11--18",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Venkateswaran:2004:MPN,
author = "N. Venkateswaran and Waran Research Foundation and
Aditya Krishnan and S. Niranjan Kumar and Arrvindh
Shriraman and Srinivas Sridharan",
title = "Memory in processor: a novel design paradigm for
supercomputing architectures",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "19--26",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Branovic:2004:WCE,
author = "I. Branovic and R. Giorgi and E. Martinelli",
title = "A workload characterization of elliptic curve
cryptography methods in embedded environments",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "27--34",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brifault:2004:DCM,
author = "K. Brifault and H. P. Charles",
title = "Data cache management on {EPIC} architecture:
optimizing memory access for image processing",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "35--42",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shimizu:2004:JOL,
author = "Naohiko Shimizu and Chiaki Kon",
title = "{Java} object look aside buffer for embedded
applications",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "43--49",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sakanaka:2004:LER,
author = "Akihito Sakanaka and Seiichirou Fujii and Toshinori
Sato",
title = "A leakage-energy-reduction technique for
highly-associative caches in embedded systems",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "50--54",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moch:2004:HSM,
author = "S. Moch and M. Berekovi{\'c} and H. J. Stolberg and L.
Friebe and M. B. Kulaczewski and A. Dehnhardt and P.
Pirsch",
title = "{HIBRID-SOC}: a multi-core architecture for image and
video applications",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "55--61",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Berekovic:2004:SCS,
author = "Mladen Berekovic and S{\"o}ren Moch and Peter Pirsch",
title = "A scalable, clustered {SMT} processor for digital
signal processing",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "62--69",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bartolini:2004:PIS,
author = "S. Bartolini and C. A. Prete",
title = "A proposal for input-sensitivity analysis of
profile-driven optimizations on embedded applications",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "70--77",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2004:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "3",
pages = "78--83",
month = jun,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:01 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mashey:2004:WBM,
author = "John R. Mashey",
title = "War of the benchmark means: time for a truce",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "4",
pages = "1--14",
month = sep,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:16 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lafitte:2004:YLL,
author = "Jean-Louis Lafitte",
title = "40 years later \ldots{} a new engine to handle an
operating system infrastructure",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "4",
pages = "15--22",
month = sep,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:16 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2004:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "4",
pages = "23--41",
month = sep,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:16 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hammond:2004:PTC,
author = "Lance Hammond and Brian D. Carlstrom and Vicky Wong
and Ben Hertzberg and Mike Chen and Christos Kozyrakis
and Kunle Olukotun",
title = "Programming with transactional coherence and
consistency {(TCC)}",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "1--13",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Budiu:2004:SC,
author = "Mihai Budiu and Girish Venkataramani and Tiberiu
Chelcea and Seth Copen Goldstein",
title = "Spatial computation",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "14--26",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ekanayake:2004:ULP,
author = "Virantha Ekanayake and Clinton {Kelly IV} and Rajit
Manohar",
title = "An ultra low-power processor for sensor networks",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "27--36",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lumb:2004:DSD,
author = "Christopher R. Lumb and Richard Golding",
title = "{D-SPTF}: decentralized request distribution in
brick-based storage systems",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "37--47",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Saito:2004:FBD,
author = "Yasushi Saito and Svend Fr{\o}lund and Alistair Veitch
and Arif Merchant and Susan Spence",
title = "{FAB}: building distributed enterprise disk arrays
from commodity components",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "48--58",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Denehy:2004:DSA,
author = "Timothy E. Denehy and John Bent and Florentina I.
Popovici and Andrea C. Arpaci-Dusseau and Remzi H.
Arpaci-Dusseau",
title = "Deconstructing storage arrays",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "59--71",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhuang:2004:HIE,
author = "Xiaotong Zhuang and Tao Zhang and Santosh Pande",
title = "{HIDE}: an infrastructure for efficiently protecting
information leakage on the address bus",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "72--84",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Suh:2004:SPE,
author = "G. Edward Suh and Jae W. Lee and David Zhang and
Srinivas Devadas",
title = "Secure program execution via dynamic information flow
tracking",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "85--96",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huh:2004:CDM,
author = "Jaehyuk Huh and Jichuan Chang and Doug Burger and
Gurindar S. Sohi",
title = "Coherence decoupling: making use of incoherence",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "97--106",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Srinivasan:2004:CFP,
author = "Srikanth T. Srinivasan and Ravi Rajwar and Haitham
Akkary and Amit Gandhi and Mike Upton",
title = "Continual flow pipelines",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "107--119",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Desikan:2004:SSR,
author = "Rajagopalan Desikan and Simha Sethumadhavan and Doug
Burger and Stephen W. Keckler",
title = "Scalable selective re-execution for {EDGE}
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "120--132",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Regehr:2004:HSA,
author = "John Regehr and Alastair Reid",
title = "{HOIST}: a system for automatically deriving static
analyzers for embedded systems",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "133--143",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2004:HTV,
author = "Perry H. Wang and Jamison D. Collins and Hong Wang and
Dongkeun Kim and Bill Greene and Kai-Ming Chan and
Aamir B. Yunus and Terry Sych and Stephen F. Moore and
John P. Shen",
title = "Helper threads via virtual multithreading on an
experimental {Itanium-2} processor-based platform",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "144--155",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hauswirth:2004:LOM,
author = "Matthias Hauswirth and Trishul M. Chilimbi",
title = "Low-overhead memory leak detection using adaptive
statistical profiling",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "156--164",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shen:2004:LPP,
author = "Xipeng Shen and Yutao Zhong and Chen Ding",
title = "Locality phase prediction",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "165--176",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhou:2004:DTP,
author = "Pin Zhou and Vivek Pandey and Jagadeesan Sundaresan
and Anand Raghuraman and Yuanyuan Zhou and Sanjeev
Kumar",
title = "Dynamic tracking of page miss ratio curve for memory
management",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "177--188",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rabbah:2004:COP,
author = "Rodric M. Rabbah and Hariharan Sandanagobalane and
Mongkol Ekpanyapong and Weng-Fai Wong",
title = "Compiler orchestrated prefetching via speculation and
predication",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "189--198",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cher:2004:SPM,
author = "Chen-Yong Cher and Antony L. Hosking and T. N.
Vijaykumar",
title = "Software prefetching for mark-sweep garbage
collection: hardware analysis and software redesign",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "199--210",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lowell:2004:DVM,
author = "David E. Lowell and Yasushi Saito and Eileen J.
Samberg",
title = "Devirtualizable virtual machines enabling general,
single-node, online maintenance",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "211--223",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Smolens:2004:FBS,
author = "Jared C. Smolens and Brian T. Gold and Jangwoo Kim and
Babak Falsafi and James C. Hoe and Andreas G.
Nowatzyk",
title = "Fingerprinting: bounding soft-error detection latency
and bandwidth",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "224--234",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bronevetsky:2004:ALC,
author = "Greg Bronevetsky and Daniel Marques and Keshav Pingali
and Peter Szwed and Martin Schulz",
title = "Application-level checkpointing for shared memory
programs",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "235--247",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wu:2004:FOM,
author = "Qiang Wu and Philo Juang and Margaret Martonosi and
Douglas W. Clark",
title = "Formal online methods for voltage\slash frequency
control in multiple clock domain microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "248--259",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gomaa:2004:HRL,
author = "Mohamed Gomaa and Michael D. Powell and T. N.
Vijaykumar",
title = "Heat-and-run: leveraging {SMT} and {CMP} to manage
power density through the operating system",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "260--270",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:2004:PDE,
author = "Xiaodong Li and Zhenmin Li and Francis David and Pin
Zhou and Yuanyuan Zhou and Sarita Adve and Sanjeev
Kumar",
title = "Performance directed energy management for main memory
and disks",
journal = j-COMP-ARCH-NEWS,
volume = "32",
number = "5",
pages = "271--283",
month = dec,
year = "2004",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chess:2005:SAC,
author = "David M. Chess",
title = "Security in autonomic computing",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "2--5",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Shi:2005:TIA,
author = "Weidong Shi and Hsien-Hsin S. Lee and Chenghuai Lu and
Mrinmoy Ghosh",
title = "Towards the issues in architectural support for
protection of software execution",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "6--15",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{McGregor:2005:PCK,
author = "John P. McGregor and Ruby B. Lee",
title = "Protecting cryptographic keys and computations via
virtual secure coprocessing",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "16--26",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Rogers:2005:MPH,
author = "Brian Rogers and Yan Solihin and Milos Prvulovic",
title = "Memory predecryption: hiding the latency overhead of
memory encryption",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "27--33",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Holland:2005:ADK,
author = "David A. Holland and Ada T. Lim and Margo I. Seltzer",
title = "An architecture a day keeps the hacker away",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "34--41",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Sidiroglou:2005:HSS,
author = "Stelios Sidiroglou and Michael E. Locasto and Angelos
D. Keromytis",
title = "Hardware support for self-healing software services",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "42--47",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Crandall:2005:SAM,
author = "Jedidiah R. Crandall and Frederic T. Chong",
title = "A security assessment of the {Minos} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "48--57",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Burnside:2005:CCP,
author = "Matthew Burnside and Angelos D. Keromytis",
title = "The case for crypto protocol awareness inside the {OS}
kernel",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "58--64",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Corliss:2005:UDP,
author = "Marc L. Corliss and E. Christopher Lewis and Amir
Roth",
title = "Using {DISE} to protect return addresses from attack",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "65--72",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Ye:2005:RRA,
author = "Dong Ye and David Kaeli",
title = "A reliable return address stack: microarchitectural
features to defeat stack smashing",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "73--80",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Inoue:2005:EST,
author = "Koji Inoue",
title = "Energy-security tradeoff in a secure cache
architecture against buffer overflow attacks",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "81--89",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Uluski:2005:CAW,
author = "Derek Uluski and Micha Moffie and David Kaeli",
title = "Characterizing antivirus workload execution",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "90--98",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Aldwairi:2005:CSM,
author = "Monther Aldwairi and Thomas Conte and Paul Franzon",
title = "Configurable string matching hardware for speeding up
intrusion detection",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "99--107",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Milenkovic:2005:UIB,
author = "Milena Milenkovi{\'c} and Aleksandar Milenkovi{\'c}
and Emil Jovanov",
title = "Using instruction block signatures to counter code
injection attacks",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "108--117",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Zhang:2005:ASP,
author = "Youtao Zhang and Jun Yang and Yongjing Lin and Lan
Gao",
title = "Architectural support for protecting user privacy on
trusted processors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "118--123",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Shirase:2005:AEC,
author = "Masaaki Shirase and Yasushi Hibino",
title = "An architecture for elliptic curve cryptography
computation",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "124--133",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Kgil:2005:CSS,
author = "Taeho Kgil and Laura Falk and Trevor Mudge",
title = "{ChipLock}: support for secure microarchitectures",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "134--143",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Workshop on Architectural Support for Security and
Anti-Virus (WASSA)",
}
@Article{Ekman:2005:DLC,
author = "Magnus Ekman and Fredrik Warg and Jim Nilsson",
title = "An in-depth look at computer performance growth",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "144--147",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Venkateswaran:2005:FTB,
author = "N. Venkateswaran and S. Balaji and V. Sridhar",
title = "Fault tolerant bus architecture for deep submicron
based processors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "148--155",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2005:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "1",
pages = "156--160",
month = mar,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:37 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:2005:APC,
author = "Ruby B. Lee and Peter C. S. Kwan and John P. McGregor
and Jeffrey Dwoskin and Zhenghong Wang",
title = "Architecture for Protecting Critical Secrets in
Microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "2--13",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2005:GCM,
author = "Anonymous",
title = "{General Chair}'s Message",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "9--9",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2005:PCM,
author = "Anonymous",
title = "Program {Chair}'s Message",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "x--xv",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shi:2005:HEC,
author = "Weidong Shi and Hsien-Hsin S. Lee and Mrinmoy Ghosh
and Chenghuai Lu and Alexandra Boldyreva",
title = "High Efficiency Counter Mode Security Architecture via
Prediction and Precomputation",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "14--24",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2005:C,
author = "Anonymous",
title = "Committees",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "16--16",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2005:R,
author = "Anonymous",
title = "Reviewers",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "xvii--xviii",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Suh:2005:DIA,
author = "G. Edward Suh and Charles W. O'Donnell and Ishan
Sachdev and Srinivas Devadas",
title = "Design and Implementation of the {AEGIS} Single-Chip
Secure Processor Using Physical Random Functions",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "25--36",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gurumurthi:2005:DDR,
author = "Sudhanva Gurumurthi and Anand Sivasubramaniam and
Vivek K. Natarajan",
title = "Disk Drive Roadmap from the Thermal Perspective: a
Case for Dynamic Thermal Management",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "38--49",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huggahalli:2005:DCA,
author = "Ram Huggahalli and Ravi Iyer and Scott Tetrick",
title = "Direct Cache Access for High Bandwidth Network {I/O}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "50--59",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gunawi:2005:DCS,
author = "Haryadi S. Gunawi and Nitin Agrawal and Andrea C.
Arpaci-Dusseau and Remzi H. Arpaci-Dusseau and Jiri
Schindler",
title = "Deconstructing Commodity Storage Clusters",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "60--71",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ekman:2005:RMM,
author = "Magnus Ekman and Per Stenstr{\"o}m",
title = "A Robust Main-Memory Compression Scheme",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "74--85",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fahs:2005:CO,
author = "Brian Fahs and Todd Rafacz and Sanjay J. Patel and
Steven S. Lumetta",
title = "Continuous Optimization",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "86--97",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Petric:2005:RRB,
author = "Vlad Petric and Tingting Sha and Amir Roth",
title = "{RENO}: a Rename-Based Instruction Optimizer",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "98--109",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tan:2005:HTS,
author = "Lin Tan and Timothy Sherwood",
title = "A High Throughput String Matching Architecture for
Intrusion Detection and Prevention",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "112--122",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Baboescu:2005:TBR,
author = "Florin Baboescu and Dean M. Tullsen and Grigore Rosu
and Sumeet Singh",
title = "A Tree Based Router Search Engine Architecture with
Single Port Memories",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "123--133",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kyo:2005:IMA,
author = "Shorin Kyo and Shin'ichiro Okazaki and Tamio Arai",
title = "An Integrated Memory Array Processor Architecture for
Embedded Image Recognition Systems",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "134--145",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reis:2005:DEH,
author = "George A. Reis and Jonathan Chang and Neil
Vachharajani and Ram Rangan and David I. August and
Shubhendu S. Mukherjee",
title = "Design and Evaluation of Hybrid Fault-Detection
Systems",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "148--159",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schuchman:2005:RMT,
author = "Ethan Schuchman and T. N. Vijaykumar",
title = "{Rescue}: a Microarchitecture for Testability and
Defect Tolerance",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "160--171",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gomaa:2005:OTF,
author = "Mohamed A. Gomaa and T. N. Vijaykumar",
title = "Opportunistic Transient-Fault Detection",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "172--183",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Balensiefer:2005:EFI,
author = "Steven Balensiefer and Lucas Kregor-Stickles and Mark
Oskin",
title = "An Evaluation Framework and Instruction Set
Architecture for Ion-Trap Based Quantum
Micro-Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "186--196",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nazhandali:2005:EOS,
author = "Leyla Nazhandali and Bo Zhai and Javin Olson and Anna
Reeves and Michael Minuth and Ryan Helfand and Sanjay
Pant and Todd Austin and David Blaauw",
title = "Energy Optimization of Subthreshold-Voltage Sensor
Network Processors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "197--207",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hempstead:2005:ULP,
author = "Mark Hempstead and Nikhil Tripathi and Patrick Mauro
and Gu-Yeon Wei and David Brooks",
title = "An Ultra Low Power System Architecture for Sensor
Network Applications",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "208--219",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wenisch:2005:TSS,
author = "Thomas F. Wenisch and Stephen Somogyi and Nikolaos
Hardavellas and Jangwoo Kim and Anastassia Ailamaki and
Babak Falsafi",
title = "Temporal Streaming of Shared Memory",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "222--233",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moshovos:2005:REC,
author = "Andreas Moshovos",
title = "{RegionScout}: Exploiting Coarse Grain Sharing in
Snoop-Based Coherence",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "234--245",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cantin:2005:IMP,
author = "Jason F. Cantin and Mikko H. Lipasti and James E.
Smith",
title = "Improving Multiprocessor Performance with Coarse-Grain
Coherence Tracking",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "246--257",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hines:2005:IPE,
author = "Stephen Hines and Joshua Green and Gary Tyson and
David Whalley",
title = "Improving Program Efficiency by Packing Instructions
into Registers",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "260--271",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Clark:2005:AFT,
author = "Nathan Clark and Jason Blome and Michael Chu and Scott
Mahlke and Stuart Biles and Krisztian Flautner",
title = "An Architecture Framework for Transparent Instruction
Set Customization in Embedded Processors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "272--283",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Narayanasamy:2005:BCR,
author = "Satish Narayanasamy and Gilles Pokam and Brad Calder",
title = "{BugNet}: Continuously Recording Program Execution for
Deterministic Replay Debugging",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "284--295",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Annavaram:2005:MAL,
author = "Murali Annavaram and Ed Grochowski and John Shen",
title = "Mitigating {Amdahl's Law} through {EPI} Throttling",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "298--309",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "energy per instruction (EPI)",
}
@Article{Talpes:2005:ISP,
author = "Emil Talpes and Diana Marculescu",
title = "Increased Scalability and Power Efficiency by Using
Multiple Speed Pipelines",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "310--321",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Petric:2005:EEP,
author = "Vlad Petric and Amir Roth",
title = "Energy-Effectiveness of Pre-Execution and Energy-Aware
{P}-Thread Selection",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "322--333",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2005:VRM,
author = "Michael Zhang and Krste Asanovic",
title = "Victim Replication: Maximizing Capacity while Hiding
Wire Delay in Tiled Chip Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "336--345",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Speight:2005:AMP,
author = "Evan Speight and Hazim Shafi and Lixin Zhang and Ram
Rajamony",
title = "Adaptive Mechanisms and Policies for Managing Cache
Hierarchies in Chip Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "346--356",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chishti:2005:ORC,
author = "Zeshan Chishti and Michael D. Powell and T. N.
Vijaykumar",
title = "Optimizing Replication, Communication, and Capacity
Allocation in {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "357--368",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mutlu:2005:TEP,
author = "Onur Mutlu and Hyesoon Kim and Yale N. Patt",
title = "Techniques for Efficient Processing in Runahead
Execution Engines",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "370--381",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jimenez:2005:PLB,
author = "Daniel A. Jimenez",
title = "Piecewise Linear Branch Prediction",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "382--393",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seznec:2005:AGH,
author = "Andre Seznec",
title = "Analysis of the {O-GEometric History Length} Branch
Predictor",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "394--405",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:2005:IMC,
author = "Rakesh Kumar and Victor Zyuban and Dean M. Tullsen",
title = "Interconnections in Multi-Core Architectures:
Understanding Mechanisms, Overheads and Scaling",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "408--419",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2005:MHR,
author = "John Kim and William J. Dally and Brian Towles and
Amit K. Gupta",
title = "Microarchitecture of a High-Radix Router",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "420--431",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seo:2005:NOW,
author = "Daeho Seo and Akif Ali and Won-Taek Lim and Nauman
Rafique and Mithuna Thottethodi",
title = "Near-Optimal Worst-Case Throughput Routing for
Two-Dimensional Mesh Networks",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "432--443",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gandhi:2005:SLS,
author = "Amit Gandhi and Haitham Akkary and Ravi Rajwar and
Srikanth T. Srinivasan and Konrad Lai",
title = "Scalable Load and Store Processing in Latency Tolerant
Processors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "446--457",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Roth:2005:SVW,
author = "Amir Roth",
title = "{Store Vulnerability Window (SVW)}: Re-Execution
Filtering for Enhanced Load Optimization",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "458--468",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Torres:2005:SBD,
author = "E. F. Torres and P. Ibanez and V. Vinals and J. M.
Llaberia",
title = "Store Buffer Design in First-Level Multibanked Data
Caches",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "469--480",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Meixner:2005:DVS,
author = "Albert Meixner and Daniel J. Sorin",
title = "Dynamic Verification of Sequential Consistency",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "482--493",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rajwar:2005:VTM,
author = "Ravi Rajwar and Maurice Herlihy and Konrad Lai",
title = "Virtualizing Transactional Memory",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "494--505",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Balakrishnan:2005:IPA,
author = "Saisanthosh Balakrishnan and Ravi Rajwar and Mike
Upton and Konrad Lai",
title = "The Impact of Performance Asymmetry in Emerging
Multicore Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "506--517",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Srinivasan:2005:ESD,
author = "Jayanth Srinivasan and Sarita V. Adve and Pradip Bose
and Jude A. Rivers",
title = "Exploiting Structural Duplication for Lifetime
Reliability Enhancement",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "520--531",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Biswas:2005:CAV,
author = "Arijit Biswas and Paul Racunas and Razvan Cheveresan
and Joel Emer and Shubhendu S. Mukherjee and Ram
Rangan",
title = "Computing Architectural Vulnerability Factors for
Address-Based Structures",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "532--543",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Qureshi:2005:VWC,
author = "Moinuddin K. Qureshi and David Thompson and Yale N.
Patt",
title = "The {V-Way Cache}: Demand Based Associativity via
Global Replacement",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "544--555",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2005:AI,
author = "Anonymous",
title = "Author Index",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "2",
pages = "556--557",
month = may,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:40:51 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bartolini:2005:GEI,
author = "S. Bartolini and P. Foglia and C. A. Prete",
title = "{Guests editors'} introduction",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "1--2",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101870",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this issue of ACM SigArch Newsletter, we present
eight papers from the MEDEA Workshop, held in
conjunction with the International Conference on
Parallel Architectures and Compilation Techniques
(PACT-2004) [1], [2].",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fradj:2005:EAM,
author = "Hanene Ben Fradj and Asmaa el Ouardighi and C{\'e}cile
Belleudy and Michel Auguin",
title = "Energy aware memory architecture configuration",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "3--9",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101871",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the context of battery-driven embedded systems,
reducing energy while maintaining performance is one of
today's challenges. The on-chip memory count for a
great part of the whole system consumption, especially
for images and video processing applications that make
heavy use of large memory data size. In this paper, we
present new technique for efficiently exploiting
on-chip memory space (cache, scratchpad) for a specific
application to reduce the energy consumption without
loss of performance. We configure and compare the
impact of three different memory architectures on the
energy consumption. The first one is composed of main
memory with cache, in the second architecture we find a
main memory and scratchpad memory and in the last
architecture we combine both cache and scratchpad with
the main memory. We show the effectiveness of the last
architecture and a saving about 35\% in energy
consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Suh:2005:DOC,
author = "Hyo-Joong Suh and Sung Woo Chung",
title = "{DRACO}: optimized {CC-NUMA} system with novel
dual-link interconnections to reduce the memory
latency",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "10--16",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101872",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The performances of multiprocessor systems mainly rely
on the processor clock speed and the memory latency. As
the processors speed up rapidly, the memory latency
becomes a major performance bottleneck in
multiprocessor systems. In this paper, we propose a
dual-link interconnection topology and its effective
routing scheme to reduce the remote memory latency on
the interconnection network. It can be applied at a
same implementation cost as traditional bi-directional
ring systems. We compare the performance of the
proposed system to that of the traditional
bi-directional ring-based system and toroidal
mesh-based system. By simulations, it is shown that the
proposed system outperforms the traditional
bi-directional ring-based system by 42~101 \% and
excels the toroidal mesh-based system by 4~14\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yehia:2005:LSA,
author = "Sami Yehia and Jean-Fran{\c{c}}ois Collard and Olivier
Temam",
title = "Load squared: adding logic close to memory to reduce
the latency of indirect loads with high miss ratios",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "17--24",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101873",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Indirect memory accesses, where a load is fed by
another load, are ubiquitous because of rich data
structures and sophisticated software conventions, such
as the use of linkage tables and position independent
code. Unfortunately, they can be costly: if both loads
miss, two round trips to memory are required even
though the role of the first load is often limited to
fetching the address of the second load. To reduce the
total latency of such indirect accesses, a new
instruction called load squared is introduced. A load
squared does two fetches, the first fetch reading the
target address of the second. (An offset is optionally
added to the result of the first fetch.) The load
squared operation is performed by memory-side logic
(typically, the memory controller if it isn't located
on the main processor chip). In this study, load
squared is not an architecturally visible instruction:
the micro-architecture transparently decides which
loads should be replaced by loads squared. We show that
performance is sometimes improved significantly, and
never degraded.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kobayashi:2005:LAC,
author = "Hiroaki Kobayashi and Isao Kotera and Hiroyuki
Takizawa",
title = "Locality analysis to control dynamically way-adaptable
caches",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "25--32",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101874",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a control mechanism for
dynamically way-adaptable caches. The mechanism uses
the local and global information about the locality of
reference during execution. As the local information,
the cache access pattern is evaluated based on the
statistics of the LRU (Least-Recently Used) states of
cache entries referenced. If the memory accesses are
concentrated on and near the most recently used
entries, the mechanism knows that the locality of
reference is very high and there is room to decrease
the number of ways activated to fit the current
locality. On the other hand, if the accesses are widely
distributed from the most recently used entries to the
least recently used ones, the mechanism understands
that more ways are needed to improve the performance as
long as the resources are available. In addition, to
examine the global behavior of the locality of
reference, an n-bit state machine like n-bit branch
predictors is introduced into the mechanism. The state
machine traces a sequence of cache resizing requests
and evaluates its stability across the execution time.
Therefore, the state machine helps the mechanism avoid
unstable actions for enabling/disabling cache ways when
the locality shows the highly irregular behavior. The
experimental results indicate that an n-bit asymmetric
state machine using the LRU status information works
well to appropriately control cache ways even in the
case of the benchmarks with highly-irregular access
behaviors in cache references.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arakawa:2005:SXE,
author = "F. Arakawa and M. Ishikawa and Y. Kondo and T. Kamei
and M. Ozawa and O. Nishii and T. Hattori",
title = "{SH-X}: an embedded processor core for consumer
appliances",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "33--40",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101875",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A SuperH\TM{} embedded processor core SH-X implemented
in a 130-nm CMOS process running at 400 MHz achieved
720 MIPS and 2.8 GFLOPS at a power of 250 mW under
worst-case conditions. It has a dual-issue seven-stage
pipeline architecture, but reaches the 1.8 MIPS/MHz of
the previous five-stage processor. The on-chip memory
configuration is tuned for digital consumer appliances.
A new resume-standby mode enables a standby current of
less than 100$ \mu $A and a 3-ms recovery time. The
processor meets the requirements of a wide range of
applications, and is suitable for digital appliances
aimed at the consumer market, such as cellular phones,
digital still/video cameras, and car navigation
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Naz:2005:IDC,
author = "Afrin Naz and Mehran Rezaei and Krishna Kavi and
Philip Sweany",
title = "Improving data cache performance with integrated use
of split caches, victim cache and stream buffers",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "41--48",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101876",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In our prior work we explored a cache organization
providing architectural support for distinguishing
between memory references that exhibit spatial and
temporal locality and mapping them to separate caches.
That work showed that using separate (data) caches for
indexed or stream data and scalar data items could lead
to substantial improvements in terms of cache misses.
In addition, such a separation allowed for the design
of caches that could be tailored to meet the properties
exhibited by different data items. In this paper, we
investigate the interaction between three established
methods: split cache, victim cache and stream buffer.
Since significant amounts of compulsory and conflict
misses are avoided, the size of each cache (i.e., array
and scalar), as well as the combined cache capacity can
be reduced. Our results show that on average 55\%
reduction in miss rates over the base configuration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "array cache; memory access time; scalar cache; stream
buffer; victim cache",
}
@Article{Pajuelo:2005:SEH,
author = "Alex Pajuelo and Antonio Gonz{\'a}lez and Mateo
Valero",
title = "Speculative execution for hiding memory latency",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "49--56",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101877",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "L2 misses are one of the main causes for stalling the
activity in current and future microprocessors. In this
paper we present a mechanism to speculatively execute
independent instructions of L2-miss loads, even if no
entry in the reorder buffer is available. The proposed
mechanism generates future instances of instructions
that are expected to be independent of the delinquent
load. When these dynamic instructions are later
fetched, they use the previously precomputed data and
directly go to the commit stage without executing. The
mechanism replicates strided loads found above the
L2-miss load, that produce the data for the target
independent instructions. Instructions following the
L2-miss load will check if their source operands have
been replicated. In this case, multiple speculative
instances of them will also be generated. This
mechanism is built on top of a superscalar processor
with an aggressive prefetch scheme. Compared to this
baseline, the mechanism obtains 21\% of performance
improvement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Verdu:2005:ITA,
author = "Javier Verd{\'u} and Jorge Garc{\'\i}a and Mario
Nemirovsky and Mateo Valero",
title = "The impact of traffic aggregation on the memory
performance of networking applications",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "57--62",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1152922.1101878",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The trend of the networking processing is to increase
the intelligence of the routers (i.e. security
capacities). This means that there is an increment in
the workload generated per packet and new types of
applications are emerging, such as stateful programs.
On the other hand, Internet traffic continues to grow
vigorously. This fact involves an increment of the
traffic aggregation levels and overloads the processing
capacities of the routers. In this paper we show the
importance of traffic aggregation level on networking
application studies. We also classify the applications
according to the data management of the packet
processing. Hence, we present the different impacts on
the data cache performance depending on the application
category. Our results show that traffic aggregation
level may affect the cache performance depending on the
networking application category. Stateful applications
show a significant sensitivity to this impact.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Allu:2005:ERC,
author = "Bramha Allu and Wei Zhang",
title = "Exploiting the replication cache to improve
performance for multiple-issue microprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "63--71",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1101868.1101880",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Performance and reliability are both of great
importance for microprocessor design. Recently, the
replication cache has been proposed to enhance data
cache reliability against soft errors. The replication
cache is a small fully associative cache to store the
replica for every write to the L1 data cache. In
addition to enhance data reliability, this paper
proposes several cost-effective techniques to improve
performance of multiple-issue microprocessors by
exploiting the replication cache. The idea is to make
use of the replication cache to increase cache
bandwidth through dual load and to reduce the L1 data
cache miss rate through partial victim caching. Built
upon these two schemes, we also propose a hybrid
approach to combine the benefits of both dual load and
partial victim caching for improving performance
further. Our experimental results show that exploiting
a replication cache with only 8 entries can improve
performance by 13.0\% on average without compromising
the enhanced data integrity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2005:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "72--74",
month = jun,
year = "2005",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1101868.1101882",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on the Internet---an international
computer network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2005:MW,
author = "Anonymous",
title = "{MEDEA 2004} workshop",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "3",
pages = "??--??",
month = jun,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:06:44 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jouppi:2005:ISI,
author = "Norman P. Jouppi and Rakesh Kumar and Dean Tullsen",
title = "Introduction to the special issue on the {2005
Workshop on Design, Analysis, and Simulation of Chip
Multiprocessors (dasCMP'05)}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "4--4",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Laudon:2005:PWN,
author = "James Laudon",
title = "Performance\slash Watt: the new server focus",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "5--13",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Davis:2005:RRA,
author = "John D. Davis and Cong Fu and James Laudon",
title = "The {RASE (Rapid, Accurate Simulation Environment)}
for chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "14--23",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Hsu:2005:ECD,
author = "Lisa Hsu and Ravi Iyer and Srihari Makineni and Steve
Reinhardt and Donald Newell",
title = "Exploring the cache design space for large scale
{CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "24--33",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Davis:2005:CPS,
author = "John D. Davis and Stephen E. Richardson and Charis
Charitsis and Kunle Olukotun",
title = "A chip prototyping substrate: the flexible
architecture for simulation and testing {(FAST)}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "34--43",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Vachharajani:2005:CMP,
author = "Neil Vachharajani and Matthew Iyer and Chinmay Ashok
and Manish Vachharajani and David I. August and Daniel
Connors",
title = "Chip multi-processor scalability for single-threaded
applications",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "44--53",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Chen:2005:HMP,
author = "Julia Chen and Philo Juang and Kevin Ko and Gilberto
Contreras and David Penry and Ram Rangan and Adam
Stoler and Li-Shiuan Peh and Margaret Martonosi",
title = "Hardware-modulated parallelism in chip
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "54--63",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Sampson:2005:FSC,
author = "Jack Sampson and Rub{\'e}n Gonz{\'a}lez and
Jean-Fran{\c{c}}ois Collard and Norman P. Jouppi and
Mike Schlansker",
title = "Fast synchronization for chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "64--69",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Shayesteh:2005:DCS,
author = "Anahita Shayesteh and Glenn Reinman and Norman Jouppi
and Suleyman Sair and Tim Sherwood",
title = "Dynamically configurable shared {CMP} helper engines
for improved performance",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "70--79",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Constantinou:2005:PIS,
author = "Theofanis Constantinou and Yiannakis Sazeides and
Pierre Michaud and Damien Fetis and Andre Seznec",
title = "Performance implications of single thread migration on
a chip multi-core",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "80--91",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Special issue: dasCMP'05.",
}
@Article{Martin:2005:MGE,
author = "Milo M. K. Martin and Daniel J. Sorin and Bradford M.
Beckmann and Michael R. Marty and Min Xu and Alaa R.
Alameldeen and Kevin E. Moore and Mark D. Hill and
David A. Wood",
title = "Multifacet's general execution-driven multiprocessor
simulator {(GEMS)} toolset",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "92--99",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2005:DMS,
author = "David Wang and Brinda Ganesh and Nuengwong Tuaycharoen
and Kathleen Baynes and Aamer Jaleel and Bruce Jacob",
title = "{DRAMsim}: a memory system simulator",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "100--107",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rountree:2005:NH,
author = "Barry Rountree and Robert Springer and David K.
Lowenthal and Vincent W. Freeh",
title = "Notes from {HPPAC 2005}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "108--112",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2005:GFB,
author = "H. C. Wang and C. K. Yuen",
title = "A general framework to build new {CPUs} by mapping
abstract machine code to instruction level parallel
execution hardware",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "113--120",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sam:2005:IMS,
author = "Nana B. Sam and Martin Burtscher",
title = "Improving memory system performance with
energy-efficient value speculation",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "121--127",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2005:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "4",
pages = "128--133",
month = nov,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:08 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kaeli:2005:WIS,
author = "David Kaeli and Robert Cohn",
title = "{WBIA'05}: Introduction to the special issue",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "1--2",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Hu:2005:CCI,
author = "Chunling Hu and John McCabe and Daniel A. Jim{\'e}nez
and Ulrich Kremer",
title = "The {Camino Compiler} infrastructure",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "3--8",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Schulz:2005:SDB,
author = "Martin Schulz and Dong Ahn and Andrew Bernat and
Bronis R. de Supinski and Steven Y. Ko and Gregory Lee
and Barry Rountree",
title = "Scalable dynamic binary instrumentation for {Blue
Gene/L}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "9--14",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Borin:2005:DBC,
author = "Edson Borin and Cheng Wang and Youfeng Wu and Guido
Araujo",
title = "Dynamic binary control-flow errors detection",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "15--20",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Moffie:2005:AAS,
author = "Micha Moffie and David Kaeli",
title = "{ASM}: application security monitor",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "21--26",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Zhao:2005:DMO,
author = "Qin Zhao and Rodric Rabbah and Weng-Fai Wong",
title = "Dynamic memory optimization using pool allocation and
prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "27--32",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Gao:2005:AAL,
author = "Xiaofeng Gao and Beth Simon and Allan Snavely",
title = "{ALITER}: an asynchronous lightweight instrumentation
tool for event recording",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "33--38",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{McCurdy:2005:UPM,
author = "Collin McCurdy and Charles Fischer",
title = "Using {Pin} as a memory reference generator for
multiprocessor simulation",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "39--44",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Pan:2005:CPE,
author = "Heidi Pan and Krste Asanovi{\'c} and Robert Cohn and
Chi-Keung Luk",
title = "Controlling program execution through binary
instrumentation",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "45--50",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Faroughi:2005:PPP,
author = "Nikrouz Faroughi",
title = "Profiling of parallel processing programs on shared
memory multiprocessors using {Simics}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "51--56",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Kumar:2005:TDD,
author = "Naveen Kumar and Ramesh Peri",
title = "Transparent debugging of dynamically instrumented
programs",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "57--62",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Harris:2005:PAS,
author = "Laune C. Harris and Barton P. Miller",
title = "Practical analysis of stripped binary code",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "63--68",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Reddi:2005:PDC,
author = "Vijay Janapa Reddi and Dan Connors and Robert S.
Cohn",
title = "Persistence in dynamic code transformation systems",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "69--74",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Srinivasan:2005:MMC,
author = "Ram Srinivasan and Olaf Lubeck",
title = "{MonteSim}: a {Monte Carlo} performance model for
in-order microarchitectures",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "75--80",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Laurenzano:2005:LCT,
author = "Michael Laurenzano and Beth Simon and Allan Snavely
and Meghan Gunn",
title = "Low cost trace-driven memory simulation using
{SimPoint}",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "81--86",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "WBIA'05",
}
@Article{Thorson:2005:INd,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "33",
number = "5",
pages = "87--93",
month = dec,
year = "2005",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:24 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bartolini:2006:MPD,
author = "S. Bartolini and P. Foglia and R. Giorgi and C. A.
Prete",
title = "Memory performance: dealing with applications, systems
and architecture",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "1--2",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Friedman:2006:DCR,
author = "Scott Friedman and Praveen Krishnamurthy and Roger
Chamberlain and Ron K. Cytron and Jason E. Fritts",
title = "Dusty caches for reference counting garbage
collection",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "3--10",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ramaswamy:2006:DTC,
author = "Subramanian Ramaswamy and Jaswanth Sreeram and
Sudhakar Yalamanchili and Krishna V. Palem",
title = "Data trace cache: an application specific cache
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "11--18",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Naz:2006:MCS,
author = "Afrin Naz and Krishna Kavi and Mehran Rezaei and
Wentong Li",
title = "Making a case for split data caches for embedded
applications",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "19--26",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Allu:2006:ERC,
author = "B. Allu and W. Zhang and M. Kandala",
title = "Exploiting the replication cache to improve cache read
bandwidth cost effectively",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "27--32",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Monchiero:2006:EST,
author = "Matteo Monchiero and Gianluca Palermo and Cristina
Silvano and Oreste Villa",
title = "An efficient synchronization technique for
multiprocessor systems on-chip",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "33--40",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khunjush:2006:HMD,
author = "Farshad Khunjush and Nikitas J. Dimopoulos",
title = "Hiding message delivery and reducing memory access
latency by providing direct-to-cache transfer during
receive operations in a message passing environment",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "41--48",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yue:2006:NCB,
author = "Yao Yue and Chuang Lin and Zhangxi Tan",
title = "{NPCryptBench}: a cryptographic benchmark suite for
network processors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "49--56",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lopez-Lagunas:2006:MBO,
author = "Abelardo L{\'o}pez-Lagunas and Sek M. Chai",
title = "Memory bandwidth optimization through stream
descriptors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "57--64",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chiyonobu:2006:EEI,
author = "Akihiro Chiyonobu and Toshinori Sato",
title = "Energy-efficient instruction scheduling utilizing
cache miss information",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "65--70",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bardine:2006:AEV,
author = "Alessandro Bardine and Alessio Bechini and
Pierfrancesco Foglia and Cosimo Antonio Prete",
title = "Analysis of embedded video coder systems: a
system-level approach",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "71--76",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gontmakher:2006:ILG,
author = "Alex Gontmakher and Assaf Schuster and Avi Mendelson",
title = "{Inthreads}: a low granularity parallelization model",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "77--80",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2006:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "1",
pages = "81--86",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patt:2006:CAR,
author = "Yale Patt",
title = "Computer Architecture Research and Future
Microprocessors: Where Do We Go from Here?",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "2--2",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2006:GDE,
author = "Jongman Kim and Chrysostomos Nicopoulos and Dongkook
Park",
title = "A Gracefully Degrading and Energy-Efficient Modular
Router Architecture for On-Chip Networks",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "4--15",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2006:MGC,
author = "Anonymous",
title = "Message from the General Chair",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "10--10",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2006:MPC,
author = "Anonymous",
title = "Message from the Program Chair",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "11--11",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2006:R,
author = "Anonymous",
title = "Reviewers",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "14--14",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Scott:2006:BHR,
author = "Steve Scott and Dennis Abts and John Kim and William
J. Dally",
title = "The {BlackWidow} High-Radix {Clos} Network",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "16--28",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2006:SG,
author = "Anonymous",
title = "{SIGARCH} Guidelines",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "17--17",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arvind:2006:MMI,
author = "Arvind Arvind and Jan-Willem Maessen",
title = "Memory Model $=$ Instruction Reordering $+$ Store
Atomicity",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "29--40",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{vonPraun:2006:CMO,
author = "Christoph von Praun and Harold W. Cain and Jong-Deok
Choi and Kyung Dong Ryu",
title = "Conditional Memory Ordering",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "41--52",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{McDonald:2006:ASP,
author = "Austen McDonald and JaeWoong Chung and Brian D.
Carlstrom and Chi Cao Minh and Hassan Chafi and
Christos Kozyrakis and Kunle Olukotun",
title = "Architectural Semantics for Practical Transactional
Memory",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "53--65",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ranganathan:2006:ELP,
author = "Parthasarathy Ranganathan and Phil Leech and David
Irwin and Jeffrey Chase",
title = "Ensemble-level Power Management for Dense Blade
Servers",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "66--77",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Donald:2006:TMT,
author = "James Donald and Margaret Martonosi",
title = "Techniques for Multicore Thermal Management:
Classification and New Exploration",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "78--88",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lin:2006:SLP,
author = "Yuan Lin and Hyunseok Lee and Mark Woh and Yoav Harel
and Scott Mahlke and Trevor Mudge and Chaitali
Chakrabarti and Krisztian Flautner",
title = "{SODA}: a Low-power Architecture For Software Radio",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "89--101",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shi:2006:IFD,
author = "Weidong Shi and Hsien-Hsin S. Lee and Laura `Falk and
Mrinmoy Ghosh",
title = "An Integrated Framework for Dependable and Revivable
Architectures Using Multicore Processors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "102--113",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hankins:2006:MIS,
author = "Richard A. Hankins and Gautham N. Chinya and Jamison
D. Collins and Perry H. Wang and Ryan Rakvic and Hong
Wang and John P. Shen",
title = "Multiple Instruction Stream Processor",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "114--127",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Emma:2006:ESR,
author = "Philip Emma",
title = "The End of Scaling? Revolutions in Technology and
Microarchitecture as We Pass the 90 Nanometer Node",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "128--128",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:2006:DMC,
author = "Feihui Li and Chrysostomos Nicopoulos and Thomas
Richardson and Yuan Xie and Vijaykrishnan Narayanan and
Mahmut Kandemir",
title = "Design and Management of {$3$D} Chip Multiprocessors
Using Network-in-Memory",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "130--141",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Garg:2006:SMD,
author = "Alok Garg and M. Wasiur Rashid and Michael Huang",
title = "Slackened Memory Dependence Enforcement: Combining
Opportunistic Forwarding with Decoupled Verification",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "142--154",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2006:BCR,
author = "Chuanjun Zhang",
title = "Balanced Cache: Reducing Conflict Misses of
Direct-Mapped Caches",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "155--166",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Qureshi:2006:CMA,
author = "Moinuddin K. Qureshi and Daniel N. Lynch and Onur
Mutlu and Yale N. Patt",
title = "A Case for {MLP}-Aware Cache Replacement",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "167--178",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yan:2006:ICP,
author = "Chenyu Yan and Daniel Englender and Milos Prvulovic
and Brian Rogers and Yan Solihin",
title = "Improving Cost, Performance, and Security of Memory
Encryption and Authentication",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "179--190",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brodie:2006:SAH,
author = "Benjamin C. Brodie and David E. Taylor and Ron K.
Cytron",
title = "A Scalable Architecture For High-Throughput
Regular-Expression Pattern Matching",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "191--202",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hasan:2006:CSE,
author = "Jahangir Hasan and Srihari Cadambi and Venkatta
Jakkula and Srimat Chakradhar",
title = "{Chisel}: a Storage-efficient, Collision-free
Hash-based Network Processing Architecture",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "203--215",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Colohan:2006:TDB,
author = "Christopher B. Colohan and Anastassia Ailamaki and J.
Gregory Steffan and Todd C. Mowry",
title = "Tolerating Dependences Between Large Speculative
Threads Via Sub-Threads",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "216--226",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ceze:2006:BDS,
author = "Luis Ceze and James Tuck and Josep Torrellas and Calin
Cascaval",
title = "Bulk Disambiguation of Speculative Threads in
Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "227--238",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Choi:2006:LBS,
author = "Seungryul Choi and Donald Yeung",
title = "Learning-Based {SMT} Processor Resource Distribution
via Hill-Climbing",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "239--251",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Somogyi:2006:SMS,
author = "Stephen Somogyi and Thomas F. Wenisch and Anastassia
Ailamaki and Babak Falsafi and Andreas Moshovos",
title = "Spatial Memory Streaming",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "252--263",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:2006:CCC,
author = "Jichuan Chang and Gurindar S. Sohi",
title = "Cooperative Caching for Chip Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "264--276",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hu:2006:RST,
author = "Shiliang Hu and James E. Smith",
title = "Reducing Startup Time in Co-Designed Virtual
Machines",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "277--288",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yang:2006:TAD,
author = "Qing Yang and Weijun Xiao and Jin Ren",
title = "{TRAP}-Array: a Disk Array Architecture Providing
Timely Recovery to Any Point-in-time",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "289--301",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Balakrishnan:2006:PDD,
author = "Saisanthosh Balakrishnan and Gurindar S. Sohi",
title = "Program Demultiplexing: Data-flow based Speculative
Parallelization of Methods in Sequential Programs",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "302--313",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Swanson:2006:APT,
author = "Steven Swanson and Andrew Putnam and Martha Mercaldi
and Ken Michelson and Andrew Petersen and Andrew
Schwerin and Mark Oskin and Susan J. Eggers",
title = "Area-Performance Trade-offs in Tiled Dataflow
Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "314--326",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Strauss:2006:FSA,
author = "Karin Strauss and Xiaowei Shen and Josep Torrellas",
title = "Flexible Snooping: Adaptive Forwarding and Filtering
of Snoops in Embedded-Ring Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "327--338",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheng:2006:IAC,
author = "Liqun Cheng and Naveen Muralimanohar and Karthik
Ramani and Rajeev Balasubramonian and John B. Carter",
title = "Interconnect-Aware Coherence Protocols for Chip
Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "339--351",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Herrod:2006:FVT,
author = "Steve Herrod",
title = "The Future of Virtualization Technology",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "352--352",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{VanMeter:2006:DAQ,
author = "Rodney {Van Meter} and Kae Nemoto and W. J. Munro and
Kohei M. Itoh",
title = "Distributed Arithmetic on a Quantum Multicomputer",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "354--365",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Isailovic:2006:INS,
author = "Nemanja Isailovic and Yatish Patel and Mark Whitney
and John Kubiatowicz",
title = "Interconnection Networks for Scalable Quantum
Computers",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "366--377",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thaker:2006:QMH,
author = "Darshan D. Thaker and Tzvetan S. Metodi and Andrew W.
Cross and Isaac L. Chuang and Frederic T. Chong",
title = "Quantum Memory Hierarchies: Efficient Designs to Match
Available Parallelism in Quantum Computing",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "378--390",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2006:AI,
author = "Anonymous",
title = "Author Index",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "2",
pages = "391--391",
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 21 15:00:05 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burtscher:2006:TTA,
author = "Martin Burtscher",
title = "{TCgen 2.0}: a tool to automatically generate lossless
trace compressors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "3",
pages = "1--8",
month = jun,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 4 12:39:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:2006:LLB,
author = "Abhas Kumar and Nisheet Jain and Mainak Chaudhuri",
title = "Long-latency branches: how much do they matter?",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "3",
pages = "9--15",
month = jun,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 4 12:39:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2006:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "3",
pages = "16--21",
month = jun,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 4 12:39:50 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Henning:2006:SCB,
author = "John L. Henning",
title = "{SPEC CPU2006} benchmark descriptions",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "4",
pages = "1--17",
month = sep,
year = "2006",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1186736.1186737",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:07:09 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "On August 24, 2006, the Standard Performance
Evaluation Corporation (SPEC) announced CPU2006 [2],
which replaces CPU2000. The SPEC CPU benchmarks are
widely used in both industry and academia [3].",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Citron:2006:HGM,
author = "Daniel Citron and Adham Hurani and Alaa Gnadrey",
title = "The harmonic or geometric mean: does it really
matter?",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "4",
pages = "18--25",
month = sep,
year = "2006",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1186736.1186738",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:07:09 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "For several decades, computer scientists have been
arguing which mean is more appropriate for summarizing
computer performance: the harmonic or the geometric. We
show that many test cases used in the past to discredit
one mean or the other are either artificial or
incidental. Changing only one of the benchmarks may
result in totally different conclusions. In addition,
we conclude that for the SPEC CPU2000 benchmark suite,
the choice of averaging has very little influence on
the relative standing of different machines. Therefore,
the decision to purchase one system rather then another
should not be influenced by the type of averaging
used.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Poe:2006:BBS,
author = "James Poe and Tao Li",
title = "{BASS}: a benchmark suite for evaluating architectural
security systems",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "4",
pages = "26--33",
month = sep,
year = "2006",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1186736.1186739",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:07:09 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As software vulnerabilities continue to be exposed on
a daily basis and the motivation of cunning adversaries
to compromise valuable computer assets grows, novel
methods must be developed to ensure security. Recently
there has been a growing interest within the computer
architecture research community in designing
architectural and hardware mechanisms to improve
security. Unfortunately, there is currently not a
representative set of benchmarks for evaluating the
security features of proposed hardware modifications.
The frequent result is that great effort is often spent
searching for vulnerable programs, and/or evaluations
suffer from a lack of diversity. To address this
problem, we developed BASS, a benchmark suite to
evaluate the security features of proposed
architectural solutions under various malicious attack
scenarios. BASS v 1.0 currently consists of seven
benchmarks chosen to cover a diverse range of
architectural attack characteristics. To facilitate the
use of these benchmarks in architectural security
research, we have developed both vulnerable programs
and scripts to automatically generate exploits
targeting those vulnerable programs across both 32-bit
x86 and 64-bit Alpha Linux platforms. The entire BASS
framework including documentation, source code, input
data sets, and precompiled binaries for the M5 full
system simulator is released under the Gnu GPL and can
be freely downloaded at
http://www.ideal.ece.ufl.edu/bass.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2006:IN,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "4",
pages = "34--37",
month = sep,
year = "2006",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1186736.1186741",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 12:07:09 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on the Internet---an international
computer network. As always, the opinions expressed in
this column are the personal views of the authors, and
do not necessarily represent the institutions to which
they are affiliated. Text which sets the context of a
message appears underlined or in italics; this is
usually text the author has quoted from earlier
messages. The code-like expressions below the authors'
names are their addresses on Internet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rosenblum:2006:IVC,
author = "Mendel Rosenblum",
title = "Impact of virtualization on computer architecture and
operating systems",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "1--1",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Adams:2006:CSH,
author = "Keith Adams and Ole Agesen",
title = "A comparison of software and hardware techniques for
{x86} virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "2--13",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jones:2006:GMB,
author = "Stephen T. Jones and Andrea C. Arpaci-Dusseau and
Remzi H. Arpaci-Dusseau",
title = "{Geiger}: monitoring the buffer cache in a virtual
machine environment",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "14--24",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Crandall:2006:TSD,
author = "Jedidiah R. Crandall and Gary Wassermann and Daniela
A. S. de Oliveira and Zhendong Su and S. Felix Wu and
Frederic T. Chong",
title = "Temporal search: detecting hidden malware timebombs
with virtual machines",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "25--36",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lu:2006:ADA,
author = "Shan Lu and Joseph Tucek and Feng Qin and Yuanyuan
Zhou",
title = "{AVIO}: detecting atomicity violations via access
interleaving invariants",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "37--48",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Xu:2006:RTR,
author = "Min Xu and Mark D. Hill and Rastislav Bodik",
title = "A regulated transitive reduction ({RTR}) for longer
memory race recording",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "49--60",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bond:2006:BBE,
author = "Michael D. Bond and Kathryn S. McKinley",
title = "{Bell}: bit-encoding online memory leak detection",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "61--72",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shyam:2006:ULC,
author = "Smitha Shyam and Kypros Constantinides and Sujay
Phadke and Valeria Bertacco and Todd Austin",
title = "Ultra low-cost defect protection for microprocessor
pipelines",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "73--82",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Reddy:2006:UPB,
author = "Vimal K. Reddy and Eric Rotenberg and Sailashri
Parthasarathy",
title = "Understanding prediction-based partial redundant
threading for low-overhead, high-coverage fault
tolerance",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "83--94",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parashar:2006:SSB,
author = "Angshuman Parashar and Anand Sivasubramaniam and
Sudhanva Gurumurthi",
title = "{SlicK}: slice-based locality exploitation for
efficient redundant multithreading",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "95--105",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Heath:2006:MFT,
author = "Taliver Heath and Ana Paula Centeno and Pradeep George
and Luiz Ramos and Yogesh Jaluria",
title = "{Mercury} and {Freon}: temperature emulation and
management for server systems",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "106--116",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kgil:2006:PUS,
author = "Taeho Kgil and Shaun D'Souza and Ali Saidi and Nathan
Binkert and Ronald Dreslinski and Trevor Mudge and
Steven Reinhardt and Krisztian Flautner",
title = "{PicoServer}: using {$3$D} stacking technology to
enable a compact energy efficient chip multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "117--128",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Coons:2006:SPS,
author = "Katherine E. Coons and Xia Chen and Doug Burger and
Kathryn S. McKinley and Sundeep K. Kushwaha",
title = "A spatial path scheduling algorithm for {EDGE}
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "129--140",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mercaldi:2006:IST,
author = "Martha Mercaldi and Steven Swanson and Andrew Petersen
and Andrew Putnam and Andrew Schwerin and Mark Oskin
and Susan J. Eggers",
title = "Instruction scheduling for a tiled dataflow
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "141--150",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gordon:2006:ECG,
author = "Michael I. Gordon and William Thies and Saman
Amarasinghe",
title = "Exploiting coarse-grained task, data, and pipeline
parallelism in stream programs",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "151--162",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mishra:2006:TES,
author = "Mahim Mishra and Timothy J. Callahan and Tiberiu
Chelcea and Girish Venkataramani and Seth C. Goldstein
and Mihai Budiu",
title = "{Tartan}: evaluating spatial computation for whole
program execution",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "163--174",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eyerman:2006:PCA,
author = "Stijn Eyerman and Lieven Eeckhout and Tejas Karkhanis
and James E. Smith",
title = "A performance counter architecture for computing
accurate {CPI} components",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "175--184",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:2006:AER,
author = "Benjamin C. Lee and David M. Brooks",
title = "Accurate and efficient regression modeling for
microarchitectural performance and power prediction",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "185--194",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ipek:2006:EEA,
author = "Engin {\"I}pek and Sally A. McKee and Rich Caruana and
Bronis R. de Supinski and Martin Schulz",
title = "Efficiently exploring architectural design spaces via
predictive modeling",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "195--206",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kharbutli:2006:CEP,
author = "Mazen Kharbutli and Xiaowei Jiang and Yan Solihin and
Guru Venkataramani and Milos Prvulovic",
title = "Comprehensively and efficiently protecting the heap",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "207--218",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chilimbi:2006:HIH,
author = "Trishul M. Chilimbi and Vinod Ganapathy",
title = "{HeapMD}: identifying heap-based bugs using anomaly
detection",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "219--228",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Narayanasamy:2006:RSM,
author = "Satish Narayanasamy and Cristiano Pereira and Brad
Calder",
title = "Recording shared memory dependencies using strata",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "229--240",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patwardhan:2006:DTS,
author = "Jaidev P. Patwardhan and Vijeta Johri and Chris Dwyer
and Alvin R. Lebeck",
title = "A defect tolerant self-organizing nanoscale {SIMD}
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "241--251",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schuchman:2006:PTA,
author = "Ethan Schuchman and T. N. Vijaykumar",
title = "A program transformation and architecture support for
quantum uncomputation",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "252--263",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mysore:2006:IC,
author = "Shashidhar Mysore and Banit Agrawal and Navin
Srivastava and Sheng-Chih Lin and Kaustav Banerjee and
Tim Sherwood",
title = "Introspective {$3$D} chips",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "264--273",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cantin:2006:SP,
author = "Jason F. Cantin and Mikko H. Lipasti and James E.
Smith",
title = "Stealth prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "274--282",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chakraborty:2006:CSE,
author = "Koushik Chakraborty and Philip M. Wells and Gurindar
S. Sohi",
title = "Computation spreading: employing hardware migration to
specialize {CMP} cores on-the-fly",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "283--292",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Miller:2006:SBI,
author = "Jason E. Miller and Anant Agarwal",
title = "Software-based instruction caching for embedded
processors",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "293--302",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:2006:MEM,
author = "Xin Li and Marian Boldt and Reinhard von Hanxleden",
title = "Mapping {Esterel} onto a multi-threaded embedded
processor",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "303--314",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Binkert:2006:INI,
author = "Nathan L. Binkert and Ali G. Saidi and Steven K.
Reinhardt",
title = "Integrated network interfaces for high-bandwidth
{TCP\slash IP}",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "315--324",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tarditi:2006:AUD,
author = "David Tarditi and Sidd Puri and Jose Oglesby",
title = "{Accelerator}: using data parallelism to program
{GPUs} for general-purpose uses",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "325--335",
month = dec,
year = "2006",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/1168857.1168898",
ISSN = "0163-5980",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPUs are difficult to program for general-purpose
uses. Programmers can either learn graphics APIs and
convert their applications to use graphics pipeline
operations or they can use stream programming
abstractions of GPUs. We describe Accelerator, a system
that uses data parallelism to program GPUs for
general-purpose uses instead. Programmers use a
conventional imperative programming language and a
library that provides only high-level data-parallel
operations. No aspects of GPUs are exposed to
programmers. The library implementation compiles the
data-parallel operations on the fly to optimized GPU
pixel shader code and API calls. We describe the
compilation techniques used to do this. We evaluate the
effectiveness of using data parallelism to program GPUs
by providing results for a set of compute-intensive
benchmarks. We compare the performance of Accelerator
versions of the benchmarks against hand-written pixel
shaders. The speeds of the Accelerator versions are
typically within 50\% of the speeds of hand-written
pixel shader code. Some benchmarks significantly
outperform C versions on a CPU: they are up to 18 times
faster than C code running on a CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Damron:2006:HTM,
author = "Peter Damron and Alexandra Fedorova and Yossi Lev",
title = "Hybrid transactional memory",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "336--346",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chuang:2006:UPB,
author = "Weihaw Chuang and Satish Narayanasamy and Ganesh
Venkatesh and Jack Sampson and Michael {Van Biesbrouck}
and Gilles Pokam and Brad Calder and Osvaldo Colavin",
title = "Unbounded page-based transactional memory",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "347--358",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Moravan:2006:SNT,
author = "Michelle J. Moravan and Jayaram Bobba and Kevin E.
Moore and Luke Yen and Mark D. Hill and Ben Liblit and
Michael M. Swift and David A. Wood",
title = "Supporting nested transactional memory in {logTM}",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "359--370",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chung:2006:TTM,
author = "JaeWoong Chung and Chi Cao Minh and Austen McDonald
and Travis Skare and Hassan Chafi and Brian D.
Carlstrom and Christos Kozyrakis and Kunle Olukotun",
title = "Tradeoffs in transactional memory virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "371--381",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kawahito:2006:NIR,
author = "Motohiro Kawahito and Hideaki Komatsu and Takao
Moriyama and Hiroshi Inoue and Toshio Nakatani",
title = "A new idiom recognition framework for exploiting
hardware-assist instructions",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "382--393",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bansal:2006:AGP,
author = "Sorav Bansal and Alex Aiken",
title = "Automatic generation of peephole superoptimizers",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "394--403",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Solar-Lezama:2006:CSF,
author = "Armando Solar-Lezama and Liviu Tancau and Rastislav
Bodik and Sanjit Seshia and Vijay Saraswat",
title = "Combinatorial sketching for finite programs",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "404--415",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DaSilva:2006:PPA,
author = "Jeff {Da Silva} and J. Gregory Steffan",
title = "A probabilistic pointer analysis for speculative
optimizations",
journal = j-COMP-ARCH-NEWS,
volume = "34",
number = "5",
pages = "416--425",
month = dec,
year = "2006",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Oct 27 06:18:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tullsen:2007:ISI,
author = "Dean Tullsen and Rakesh Kumar and Norman P. Jouppi",
title = "Introduction to the special issue on the {2006
Workshop on Design, Analysis, and Simulation of Chip
Multiprocessors: (dasCMP'06)}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "2--2",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241605",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Chip multiprocessor architectures are becoming
increasingly attractive as an option to provide high
instruction throughput while keeping power and
complexity under control. Such architectures have also
been shown to have scalability and productivity
advantages. Multi-core processors are fast becoming
mainstream.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "{DASCMP'06}",
}
@Article{Mahesri:2007:HSS,
author = "Aqeel Mahesri and Nicholas J. Wang and Sanjay J.
Patel",
title = "Hardware support for software controlled
multithreading",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "3--12",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241606",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Chip multi-processors have emerged as one of the most
effective uses of the huge number of transistors
available today and in the future, but questions remain
as to the best way to leverage CMPs to accelerate
single threaded applications. Previous approaches rely
on significant speculation to accomplish this goal. Our
proposal, NXA, is less speculative than previous
proposals, relying heavily on software to guarantee
thread correctness, though still allowing parallelism
in the presence of ambiguous dependences. It divides a
single thread of execution into multiple using the
master-worker paradigm where some set of master threads
execute code that spawns tasks for other, worker
threads. The master threads generally consist of
performance critical instructions that can prefetch
data, compute critical control decisions, or compute
performance critical dataflow slices. This prevents
non-critical instructions from competing with critical
instructions for processor resources, allowing the
critical thread (and thus the workload) to complete
faster. Empirical results from performance simulation
show a 20\% improvement in performance on a 2-way CMP
machine, demonstrating that software controlled
multithreading can indeed provide a benefit in the
presence of hardware support.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "{DASCMP'06}",
}
@Article{Shi:2007:CCP,
author = "Xudong Shi and Feiqi Su and Jih-kwon Peir and Ye Xia
and Zhen Yang",
title = "{CMP} cache performance projection: accessibility vs.
capacity",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "13--20",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241607",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Efficient utilizing on-chip storage space on
Chip-Multiprocessors (CMPs) has become an important
research topic. Tradeoffs between data accessibility
and effective on-chip capacity have been studied
extensively. It requires costly simulations to
understand a wide-spectrum of the design space. In this
paper, we first develop an abstract model for
understanding the performance impact with respect to
data replication. To overcome the lack of real-time
interactions among multiple cores in the abstract
model, we propose a global stack simulation strategy to
study the performance of a variety of cache
organizations on CMPs. The global stack logically
incorporates a shared stack and per-core private stacks
to collect shared/private reuse (stack) distances for
every memory reference in a single simulation pass.
With the collected reuse distances, performance in
terms of hits/misses and average memory access times
can be calculated for various cache organizations. We
verify the stack results against individual
execution-driven simulations that consider realistic
cache parameters and delays using a set of commercial
multithreaded workloads. The results show that stack
simulations can accurately model the performance of
various cache organizations. The single-pass stack
simulation results demonstrate that the effectiveness
of various techniques for optimizing the CMP on-chip
storage is closely related to the working sets of the
workloads as well as to the total cache sizes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "CMP caches; data replication; performance modeling and
projection; stack simulation",
remark = "{DASCMP'06}",
}
@Article{Guo:2007:CQC,
author = "Fei Guo and Hari Kannan and Li Zhao and Ramesh
Illikkal and Ravi Iyer and Don Newell and Yan Solihin
and Christos Kozyrakis",
title = "From chaos to {QoS}: case studies in {CMP} resource
management",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "21--30",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241608",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As more and more cores are enabled on the die of
future CMP platforms, we expect that several diverse
workloads will run simultaneously on the platform. A
key example of this trend is the growth of
virtualization usage models. When multiple virtual
machines or applications or threads run simultaneously,
the quality of service (QoS) that the platform provides
to each individual thread is non-deterministic today.
This occurs because the simultaneously running threads
place very different demands on the shared resources
(cache space, memory bandwidth, etc) in the platform
and in most cases contend with each other. In this
paper, we first present case studies that show how this
results in non-deterministic performance. Unlike the
compute resources managed through scheduling, platform
resource allocation to individual threads cannot be
controlled today. In order to provide better
determinism and QoS, we then examine resource
management mechanisms and present QoS-aware
architectures and execution environments. The main
contribution of this paper is the architecture
feasibility analysis through prototypes that allow
experimentation with QoS-Aware execution environments
and architectural resources. We describe these QoS
prototypes and then present preliminary case studies of
multi-tasking and virtualization usage models sharing
one critical CMP resource (last-level cache). We then
demonstrate how proper management of the cache resource
can provide service differentiation and deterministic
performance behavior when running disparate workloads
in future CMP platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "{DASCMP'06}",
}
@Article{Kondo:2007:IFT,
author = "Masaaki Kondo and Hiroshi Sasaki and Hiroshi
Nakamura",
title = "Improving fairness, throughput and energy-efficiency
on a chip multiprocessor through {DVFS}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "31--38",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241609",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recently, a single chip multiprocessor (CMP) is
becoming an attractive architecture for improving
throughput of program execution. In CMPs, multiple
processor cores share several hardware resources such
as cache memory and memory bus. Therefore, the resource
contention significantly degrades performance of each
thread and also loses fairness between threads.\par
In this paper, we propose a Dynamic Frequency and
Voltage Scaling (DVFS) algorithm for improving total
instruction throughput, fairness, and energy efficiency
of CMPs. The proposed technique periodically observes
the utilization ratio of shared resources and controls
the frequency and the voltage of each processor core
individually to balance the ratio between threads. We
evaluate our technique and the evaluation results show
that fairness between threads are greatly improved by
the technique. Moreover, the total instruction
throughput increases in many cases while reducing
energy consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "{DASCMP'06}",
}
@Article{Waliullah:2007:SFC,
author = "M. M. Waliullah and Per Stenstrom",
title = "Starvation-free commit arbitration policies for
transactional memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "39--46",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241610",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In transactional memory systems like TCC, unordered
transactions are committed on a first-come, first-serve
basis. If a transaction has read data that has been
modified by the next transaction to commit, it will
have to roll-back and a lot of computations can
potentially be wasted. Even worse, such simple commit
arbitration policies are prone to starvation; in fact,
the performance of Raytrace in SPLASH-2 suffered
significantly because of this.\par
This paper analyzes in detail the design issues for
commit arbitration policies and proposes novel policies
that reduce the amount of wasted computation due to
roll-back and, most importantly, avoid starvation. We
analyze in detail how to incorporate them in a TCC-like
transactional memory protocol. We find that our
proposed schemes have no impact on the common-case
performance. In addition, they add modest complexity to
the baseline protocol.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "{DASCMP'06}",
}
@Article{Ferri:2007:HSF,
author = "Cesare Ferri and Tali Moreshet and R. Iris Bahar and
Luca Benini and Maurice Herlihy",
title = "A hardware\slash software framework for supporting
transactional memory in a {MPSoC} environment",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "47--54",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241611",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Manufacturers are focusing on
multiprocessor-system-on-a-chip (MPSoC) architectures
in order to provide increased concurrency, rather than
increased clock speed, for both large-scale as well as
embedded systems. Traditionally lock-based
synchronization is provided to support concurrency;
however, managing locks can be very difficult and error
prone. In addition, the performance and power cost of
lock-based synchronization can be high. Transactional
memories have been extensively investigated as an
alternative to lock-based synchronization in
general-purpose systems. It has been shown that
transactional memory has advantages over locks in terms
of ease of programming, performance and energy
consumption. However, their applicability to embedded
multi-core platforms has not been explored yet. In this
paper, we demonstrate a complete hardware transactional
memory solution for an embedded multi-core
architecture, consisting of a cache-coherent ARM-based
cluster, similar to ARM's MPCore. Using cycle accurate
power and performance models for the transactional
memory hardware, we evaluate our architectural
framework over a set of different system and
application settings, and show that transactional
memory is a promising solution, even for
resource-constrained embedded multiprocessors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "{DASCMP'06}",
}
@Article{Rul:2007:FLP,
author = "Sean Rul and Hans Vandierendonck and Koen {De
Bosschere}",
title = "Function level parallelism driven by data
dependencies",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "55--62",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241612",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the rise of Chip multiprocessors (CMPs), the
amount of parallel computing power will increase
significantly in the near future. However, most
programs are sequential in nature and have not been
explicitly parallelized, so they cannot exploit these
parallel resources. Automatic parallelization of
sequential, non-regular codes is very hard, as
illustrated by the lack of solutions after more than 30
years of research on the topic. The question remains if
there is parallelism in sequential programs that can be
detected automatically and if so, how much parallelism
there is.\par
In this paper, we propose a framework for extracting
potential parallelism from programs. Applying this
framework to sequential programs can teach us how much
parallelism is present in a program, but also tells us
what the most appropriate parallel construct for a
program is, e.g. a pipeline, master/slave work
distribution, etc.\par
Our framework is profile-based, implying that it is not
safe. It builds two new graph representations of the
profile-data: the interprocedural data flow graph and
the data sharing graph. This graphs show the data-flow
between functions and the data structures facilitating
this data-flow, respectively.\par
We apply our framework on the SPECcpu2000 bzip2
benchmark, achieving a speedup of 3.74 of the
compression part and a global speedup of 2.45 on a quad
processor system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "{DASCMP'06}",
}
@Article{Henning:2007:GEI,
author = "John L. Henning",
title = "{Guest editor}'s introduction",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "63--64",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241614",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "During the development of the new benchmark suite
CPU2006, SPEC analyzed benchmark candidates for various
technical attributes, including time profiles, language
standard compliance, I/O activity, system resource
usage, and many other attributes. Many people
contributed to the analysis, as shown in the credits at
www.spec.org/cpu2006/docs/credits.html. This issue of
Computer Architecture News presents a set of articles
flowing from that analysis effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Henning:2007:SCS,
author = "John L. Henning",
title = "{SPEC CPU} suite growth: an historical perspective",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "65--68",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241615",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Since 1989, the SPEC CPU benchmarks have aspired to
ambitious goals: fair, portable, comparable tests using
the compute-intensive portion of real applications. It
may be difficult today to remember just how much of a
challenge these goals presented when SPEC was first
founded, or how much of a break they were from previous
industry practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Phansalkar:2007:SSC,
author = "Aashish Phansalkar and Ajay Joshi and Lizy K. John",
title = "Subsetting the {SPEC CPU2006} benchmark suite",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "69--76",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241616",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "On August 24, 2006, the Standard Performance
Evaluation Corporation (SPEC) announced CPU2006 -- the
next generation of industry-standardized CPU-intensive
benchmark suite. The SPEC CPU benchmark suite has
become the most frequently used suite for
simulation-based computer architecture research.
Detailed processor simulators take days to weeks to
simulate each of the SPEC CPU programs. In order to
reduce simulation to a tractable time, architects and
researchers often use only a subset of benchmarks from
the SPEC CPU suite to evaluate the potential of their
ideas. Prior research has demonstrated that statistical
techniques are most effective to find a representative
subset of benchmark programs from a benchmark suite.
The objective of this paper is to apply multivariate
statistical data analysis techniques for selecting a
representative subset of programs from the SPEC CPU2006
benchmark suite. We measure a set of performance
counter based characteristics for the SPEC CPU2006
programs across a large number of architectures and
apply multivariate statistical analysis techniques to
find a representative subset of benchmarks and
representative input sets wherever multiple input sets
are provided. The results from this paper will help
architects and researchers to find a smaller but
representative set of programs from the SPEC CPU2006
benchmark suite, when time or resource constraints
prohibit experimentation with the entire benchmark
suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wong:2007:CBS,
author = "Michael Wong",
title = "{C++} benchmarks in {SPEC CPU2006}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "77--83",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241617",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In SPEC CPU2006, there are three C++ integer
benchmarks and four floating-point C++ benchmarks. This
paper describes the work of incorporating C++
benchmarks into SPEC CPU2006. It describes the base
language standard supported and the basis for run rules
adopted to maintain an even playing field for different
compilers. It also describes issues that complicate
porting C++ benchmarks. It describes some of the C++
Standard compliance issues that were technically
interesting during the benchmark development phase,
using as examples the behavior of const-correctness,
nested class access of private member of enclosing
class, and unneeded template instantiations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Henning:2007:SCM,
author = "John L. Henning",
title = "{SPEC CPU2006} memory footprint",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "84--89",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241618",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The nominal goal for memory consumption by SPEC
CPU2006 benchmarks is up to about 900 MB when compiled
with 32-bit pointers. The 900 MB maximum was chosen so
that a system with 1GB will have about 100MB available
for the operating system and overhead processes. By
comparison, the goal for SPEC CPU2000 was 200MB [1].",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gove:2007:CWS,
author = "Darryl Gove",
title = "{CPU2006} working set size",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "90--96",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241619",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "SPEC CPU2000 had a target memory footprint of 200 MB
for the benchmarks [1], to enable the suite to run on
machines with 256 MB of memory. Six years have elapsed
since the release of that suite, and in that time
memory sizes have increased significantly, so the
memory requirements for the recently released CPU2006
reflect this. CPU2006 has been targeted to have a
benchmark memory footprint of about 900MB, allowing the
suite to run on machines with 1GB of memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Korn:2007:SCS,
author = "Wendy Korn and Moon S. Chang",
title = "{SPEC CPU2006} sensitivity to memory page sizes",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "97--101",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241620",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "SPEC CPU2006 is a compute-intensive industry standard
benchmark suite published in August 2006. This paper
characterizes the memory access behavior of SPEC
CPU2006 running on IBM POWER5+ microprocessors. We
measure the maximum and average memory usage of the
benchmarks to validate SPEC's memory requirement
criteria. This paper also analyzes how different page
sizes affect the performance of the benchmarks. The
experiment reveals that 64 KB and 16 MB pages improve
the performance up to 46.9\% and 50.9\%,
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "CPI analysis; large page size; memory usage;
performance optimization; SPEC CPU2006 benchmarks;
workload characterization",
}
@Article{Weicker:2007:SPR,
author = "Reinhold P. Weicker and John L. Henning",
title = "Subroutine profiling results for the {CPU2006}
benchmarks",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "102--111",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241621",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Subroutine profiling is a well-known performance tool.
For application or system programmers, it determines
'hot spots' where the program spends most of its time,
and where careful rewriting can most help performance.
For compiler authors, it can give information about
programming style in such hot spots, and can indicate
where compiler improvements may be useful. For hardware
designers and analysts, it can be the starting point to
explain performance behavior.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ye:2007:CFA,
author = "Dong Ye and Joydeep Ray and David Kaeli",
title = "Characterization of file {I/O} activity for {SPEC
CPU2006}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "112--117",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241622",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "SPEC CPU2006 is a compute-intensive benchmark suite
designed to stress a computer system's processor,
memory subsystem, and compiler. To construct this
suite, SPEC has selected benchmarks that are derived
from real world applications. When run with their
reference inputs, these programs place a significant
computational burden on today's mainstream desktops as
well as high-end workstations and servers.\par
For these applications to thoroughly exercise the
merits of a particular processor/memory design point,
it is necessary to limit the amount of I/O activity
generated. Since these applications come from real
world applications, the suite developers have
considered how best to limit the amount of file-based
I/O activity present in these applications. This paper
presents the characteristics of file I/O activity in
the resulting suite and its overall impact on the
performance of these applications. We also report on
some of the choices SPEC has made in order to reduce
the file I/O activity in some specific programs of the
suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Henning:2007:PCD,
author = "John L. Henning",
title = "Performance counters and development of {SPEC
CPU2006}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "118--121",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241623",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Performance counters provide the means to track
detailed events that occur on a CPU chip. These events
are of interest to both performance analysts and
compiler developers. Counting them provides essential
clues to guide performance improvement. For example, a
tester who sees that a program has a high cache miss
rate on a particular system may experiment with
compilation options that improve prefetching. A
compiler developer who sees the same thing may realize
that the code generator's machine model is missing some
crucial detail of behavior on that particular system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gove:2007:ECB,
author = "Darryl Gove and Lawrence Spracklen",
title = "Evaluating the correspondence between training and
reference workloads in {SPEC CPU2006}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "122--129",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241624",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Profile feedback (sometimes called Feedback Directed
Optimisation FDO) is a useful technique for providing
the compiler with additional information about runtime
program flow. The compiler is able to use this
information to make optimisation decisions that improve
the way the code is laid out in memory or determine
which routines are inlined, and hence improve the
performance of the application.\par
The use of profile feedback requires the code to be
compiled twice. The first time the compiler generates
an instrumented version of the application. This
instrumented version is then run on one or more
'representative' training workloads to gather profile
data. This profile data contains information such as
how many times each routine is executed and how
frequently each branch is taken. The second pass
through the compiler uses this information to make more
enlightened optimisation decisions.\par
The quality of the training data impacts the ability of
the compiler to do the best job that it can. This paper
discusses a method of assessing the similarity of the
training workload to the reference workload, and
applies this methodology to evaluate the training
workloads in the SPEC CPU2006 benchmark suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Spradling:2007:SCB,
author = "Cloyce D. Spradling",
title = "{SPEC CPU2006} benchmark tools",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "130--134",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241625",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The benchmarks that make up the SPEC CPU2006 benchmark
suite are set-up, run, timed, and scored by the CPU
tools harness. The tools have evolved over time from a
collection of edit-it-yourself makefiles, scripts, and
an Excel spreadsheet to the current Perl-based suite.
The basic purpose of the tools is to make life easier
for the benchmarker; they make it easier to tweak
compilation settings, easier to keep track of those
settings, and most importantly, they make it easier to
follow the run and reporting rules.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sridhar:2007:HLO,
author = "Swaroop Sridhar and Jonathan S. Shapiro and Prashanth
P. Bungale",
title = "{HDTrans}: a low-overhead dynamic translator",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "135--140",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241602",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Dynamic translation is a general purpose tool used for
instrumenting programs at run time. Many current
translators perform substantial rewriting during
translation in an attempt to reduce execution time.
When dynamic translation is used as a ubiquitous policy
enforcement mechanism, the majority of program
executions have no dominating inner loop that can be
used to amortize the cost of translation. Even under
more favorable usage assumptions, our measurements show
that such optimizations offer no significant benefit in
most cases. A simpler, more maintainable, adaptable,
and smaller translator may be preferable to more
complicated designs.\par
In this paper, we present HDTrans, a light-weight IA-32
to IA-32 binary translation system that uses some
simple and effective translation techniques in
combination with established trace linearization and
code caching optimizations. We also present an
evaluation of translation overhead under non-ideal
conditions, showing that conventional benchmarks do not
provide a good prediction of translation overhead when
used pervasively.\par
A further contribution of this paper is an analysis of
the effectiveness of post-compile static
pre-translation techniques for overhead reduction. Our
results indicate that static pre-translation is
effective only when expensive instrumentation or
optimization is performed, and that efficient reload of
pre-translated code incurs a substantial execution-time
penalty.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yan:2007:HMC,
author = "Jun Yan and Wei Zhang",
title = "Hybrid multi-core architecture for boosting
single-threaded performance",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "141--148",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241603",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The scaling of technology and the diminishing return
of complicated uniprocessors have driven the industry
towards multicore processors. While multithreaded
applications can naturally leverage the enhanced
throughput of multi-core processors, a large number of
important applications are single-threaded, which
cannot automatically harness the potential of
multi-core processors. In this paper, we propose a
compiler-driven heterogeneous multicore architecture,
consisting of tightly-integrated VLIW (Very Long
Instruction Word) and superscalar processors on a
single chip, to automatically boost the performance of
single-threaded applications without compromising the
capability to support multithreaded programs. In the
proposed multi-core architecture, while the
high-performance VLIW core is used to run code segments
with high instruction-level parallelism (ILP) extracted
by the compiler; the superscalar core can be exploited
to deal with the runtime events that are typically
difficult for the VLIW core to handle, such as L2 cache
misses. Our initial experimental results by running the
preexecution thread on the superscalar core to mitigate
the L2 cache misses of the main thread on the VLIW core
indicate that the proposed VLIW/superscalar multi-core
processor can automatically improve the performance of
single-threaded general-purpose applications by up to
40.8\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2007:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "1",
pages = "149--154",
month = mar,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1241601.1241627",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:47:26 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on the Internet---an international
computer network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shaw:2007:ASP,
author = "David E. Shaw and Martin M. Deneroff and Ron O. Dror
and Jeffrey S. Kuskin and Richard H. Larson and John K.
Salmon and Cliff Young and Brannon Batson and Kevin J.
Bowers and Jack C. Chao and Michael P. Eastwood and
Joseph Gagliardo and J. P. Grossman and C. Richard Ho
and Douglas J. Ierardi and Istv{\'a}n Kolossv{\'a}ry
and John L. Klepeis and Timothy Layman and Christine
McLeavey and Mark A. Moraes and Rolf Mueller and Edward
C. Priest and Yibing Shan and Jochen Spengler and
Michael Theobald and Brian Towles and Stanley C. Wang",
title = "{Anton}, a special-purpose machine for molecular
dynamics simulation",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "1--12",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250664",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The ability to perform long, accurate molecular
dynamics (MD) simulations involving proteins and other
biological macro-molecules could in principle provide
answers to some of the most important currently
outstanding questions in the fields of biology,
chemistry and medicine. A wide range of biologically
interesting phenomena, however, occur over time scales
on the order of a millisecond--about three orders of
magnitude beyond the duration of the longest current MD
simulations.\par
In this paper, we describe a massively parallel machine
called Anton, which should be capable of executing
millisecond-scale classical MD simulations of such
biomolecular systems. The machine, which is scheduled
for completion by the end of 2008, is based on 512
identical MD-specific ASICs that interact in a tightly
coupled manner using a specialized high-speed
communication network. Anton has been designed to use
both novel parallel algorithms and special-purpose
logic to dramatically accelerate those calculations
that dominate the time required for a typical MD
simulation. The remainder of the simulation algorithm
is executed by a programmable portion of each chip that
achieves a substantial degree of parallelism while
preserving the flexibility necessary to accommodate
anticipated advances in physical models and simulation
methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bioinformatics; biomolecular system simulation;
computational biology; computational drug design;
molecular dynamics; protein folding; protein structure;
special-purpose machine",
}
@Article{Fan:2007:PPW,
author = "Xiaobo Fan and Wolf-Dietrich Weber and Luiz Andre
Barroso",
title = "Power provisioning for a warehouse-sized computer",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "13--23",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250665",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Large-scale Internet services require a computing
infrastructure that can be appropriately described as a
warehouse-sized computing system. The cost of building
datacenter facilities capable of delivering a given
power capacity to such a computer can rival the
recurring energy consumption costs themselves.
Therefore, there are strong economic incentives to
operate facilities as close as possible to maximum
capacity, so that the non-recurring facility costs can
be best amortized. That is difficult to achieve in
practice because of uncertainties in equipment power
ratings and because power consumption tends to vary
significantly with the actual computing activity.
Effective power provisioning strategies are needed to
determine how much computing equipment can be safely
and efficiently hosted within a given power
budget.\par
In this paper we present the aggregate power usage
characteristics of large collections of servers (up to
15 thousand) for different classes of applications over
a period of approximately six months. Those
observations allow us to evaluate opportunities for
maximizing the use of the deployed power capacity of
datacenters, and assess the risks of over-subscribing
it. We find that even in well-tuned applications there
is a noticeable gap (7 - 16\%)between achieved and
theoretical aggregate peak power usage at the cluster
level (thousands of servers). The gap grows to almost
40\% in whole datacenters. This headroom can be used to
deploy additional compute equipment within the same
power budget with minimal risk of exceeding it. We use
our modeling framework to estimate the potential of
power management schemes to reduce peak power and
energy usage. We find that the opportunities for power
and energy savings are significant, but greater at the
cluster-level (thousands of servers) than at the
rack-level (tens). Finally we argue that systems need
to be power efficient across the activity range, and
not only at peak performance levels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "energy efficiency; power modeling; power
provisioning",
}
@Article{Blundell:2007:MFC,
author = "Colin Blundell and Joe Devietti and E. Christopher
Lewis and Milo M. K. Martin",
title = "Making the fast case common and the uncommon case
simple in unbounded transactional memory",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "24--34",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250667",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware transactional memory has great potential to
simplify the creation of correct and efficient
multithreaded programs, allowing programmers to exploit
more effectively the soon-to-be-ubiquitous multi-core
designs. Several recent proposals have extended the
original bounded transactional memory to unbounded
transactional memory, a crucial step toward
transactions becoming a general-purpose primitive.
Unfortunately, supporting the concurrent execution of
an unbounded number of unbounded transactions is
challenging, and as a result, many proposed
implementations are complex.\par
This paper explores a different approach. First, we
introduce the permissions-only cache to extend the
bound at which transactions overflow to allow the fast,
bounded case to be used as frequently as possible.
Second, we propose OneTM to simplify the implementation
of unbounded transactional memory by bounding the
concurrency of transactions that overflow the cache.
These mechanisms work synergistically to provide a
simple and fast unbounded transactional memory
system.\par
The permissions-only cache efficiently maintains the
coherence permissions --- but not data-for blocks read
or written transactionally that have been evicted from
the processor's caches. By holding coherence
permissions for these blocks, the regular cache
coherence protocol can be used to detect transactional
conflicts using only a few bits of on-chip storage per
overflowed cache block. OneTM allows only one
overflowed transaction at a time, relying on the
permissions-only cache to ensure that overflow is
infrequent. We present two implementations. In
OneTM-Serialized, an overflowed transaction simply
stalls all other threads in the application.\par
In OneTM-Concurrent, non-overflowed transactions and
non-transactional code can execute concurrently with
the overflowed transaction, providing more concurrency
while retaining OneTM's core simplifying assumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "concurrency; parallel programming; transactional
memory; transactions",
}
@Article{Zhu:2007:SSB,
author = "Weirong Zhu and Vugranam C. Sreedhar and Ziang Hu and
Guang R. Gao",
title = "Synchronization state buffer: supporting efficient
fine-grain synchronization on many-core architectures",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "35--45",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250668",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Efficient fine-grain synchronization is extremely
important to effectively harness the computational
power of many-core architectures. However, designing
and implementing fine-grain synchronization in such
architectures presents several challenges, including
issues of synchronization induced overhead, storage
cost, scalability, and the level of granularity to
which synchronization is applicable. This paper
proposes the Synchronization State Buffer ( SS B), a
scalable architectural design for fine-grain
synchronization that efficiently performs
synchronizations between concurrent threads. The design
of SSB is motivated by the following observation: at
any instance during the parallel execution only a small
fraction of memory locations are actively participating
in synchronization. Based on this observation we
present a fine-grain synchronization design that
records and manages the states of frequently
synchronized data using modest hardware support. We
have implemented the SSB design in the context of the
160-core IBM Cyclops-64 architecture. Using detailed
simulation, we present our experience for a set of
benchmarks with different workload characteristics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "fine-grain synchronization; many-core; SSB",
}
@Article{Marty:2007:VHS,
author = "Michael R. Marty and Mark D. Hill",
title = "Virtual hierarchies to support server consolidation",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "46--56",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250670",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Server consolidation is becoming an increasingly
popular technique to manage and utilize systems. This
paper develops CMP memory systems for server
consolidation where most sharing occurs within Virtual
Machines (VMs). Our memory systems maximize shared
memory accesses serviced within a VM, minimize
interference among separate VMs, facilitate dynamic
reassignment of VMs to processors and memory, and
support content-based page sharing among VMs. We begin
with a tiled architecture where each of 64 tiles
contains a processor, private L1 caches, and an L2
bank. First, we reveal why single-level directory
designs fail to meet workload consolidation goals.
Second, we develop the paper's central idea of imposing
a two-level virtual (or logical) coherence hierarchy on
a physically flat CMP that harmonizes with VM
assignment. Third, we show that the best of our two
virtual hierarchy (VH) variants performs 12-58\% better
than the best alternative flat directory protocol when
consolidating Apache, OLTP, and Zeus commel workloads
on our simulated 64-core CMP.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache coherence; chip multiprocessors (CMPs); memory
hierarchies; multicore; partitioning; server
consolidation; virtual machines",
}
@Article{Nesbit:2007:VPC,
author = "Kyle J. Nesbit and James Laudon and James E. Smith",
title = "Virtual private caches",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "57--68",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250671",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Virtual Private Machines (VPM) provide a framework for
Quality of Service (QoS) in CMP-based computer systems.
VPMs incorporate microarchitecture mechanisms that
allow shares of hardware resources to be allocated to
executing threads, thus providing applications with an
upper bound on execution time regardless of other
thread activity. Virtual Private Caches (VPCs) are an
important element of VPMs. VPC hardware consists of two
major components: the VPC Arbiter, which manages shared
cache bandwidth, and the VPC Capacity Manager, which
manages the cache storage. Both the VPC Arbiter and VPC
Capacity Manager provide minimum service guarantees
that, when combined, achieve QoS for the cache
subsystem. Simulation-based evaluation shows that
conventional cache bandwidth management policies allow
concurrently executing threads to affect each other
significantly in an uncontrollable manner. The
evaluation targets cache bandwidth because the effects
of cache capacity sharing have been studied elsewhere.
In contrast with the conventional policies, the VPC
Arbiter meets its QoS performance objectives on all
workloads studied and over a range of allocated
bandwidth levels. The VPC Arbiter's fairness policy,
which distributes leftover bandwidth, mitigates the
effects of cache preemption latencies, thus ensuring
threads a high-degree of performance isolation.
Furthermore, the VPC Arbiter eliminates negative
bandwidth interference which can improve aggregate
throughput and resource utilization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessor; performance isolation; quality of
service; shared caches; soft real-time",
}
@Article{Minh:2007:EHT,
author = "Chi Cao Minh and Martin Trautmann and JaeWoong Chung
and Austen McDonald and Nathan Bronson and Jared Casper
and Christos Kozyrakis and Kunle Olukotun",
title = "An effective hybrid transactional memory system with
strong isolation guarantees",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "69--80",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250673",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose signature-accelerated transactional memory
(SigTM), a hybrid TM system that reduces the overhead
of software transactions. SigTM uses hardware
signatures to track the read-set and write-set for
pending transactions and perform conflict detection
between concurrent threads. All other transactional
functionality, including data versioning, is
implemented in software. Unlike previously proposed
hybrid TM systems, SigTM requires no modifications to
the hardware caches, which reduces hardware cost and
simplifies support for nested transactions and
multithreaded processor cores. SigTM is also the first
hybrid TM system to provide strong isolation guarantees
between transactional blocks and non-transactional
accesses without additional read and write barriers in
non-transactional code.\par
Using a set of parallel programs that make frequent use
of coarse-grain transactions, we show that SigTM
accelerates software transactions by 30\% to 280\%. For
certain workloads, SigTM can match the performance of a
full-featured hardware TM system, while for workloads
with large read-sets it can be up to two times slower.
Overall, we show that SigTM combines the performance
characteristics and strong isolation guarantees of
hardware TM implementations with the low cost and
flexibility of software TM systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "multi-core architectures; parallel programming; strong
isolation; transactional memory",
}
@Article{Bobba:2007:PPH,
author = "Jayaram Bobba and Kevin E. Moore and Haris Volos and
Luke Yen and Mark D. Hill and Michael M. Swift and
David A. Wood",
title = "Performance pathologies in hardware transactional
memory",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "81--91",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250674",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware Transactional Memory (HTM) systems reflect
choices from three key design dimensions: conflict
detection, version management, and conflict resolution.
Previously proposed HTMs represent three points in this
design space: lazy conflict detection, lazy version
management, committer wins (LL); eager conflict
detection, lazy version management, requester wins
(EL); and eager conflict detection, eager version
management, and requester stalls with conservative
deadlock avoidance (EE). To isolate the effects of
these high-level design decisions, we develop a common
framework that abstracts away differences in cache
write policies, interconnects, and ISA to compare these
three design points. Not surprisingly, the relative
performance of these systems depends on the workload.
Under light transactional loads they perform similarly,
but under heavy loads they differ by up to 80\%. None
of the systems performs best on all of our benchmarks.
We identify seven performance pathologies -interactions
between workload and system that degrade performance-as
the root cause of many performance differences:
FriendlyFire, StarvingWriter, SerializedCommit,
FutileStall, StarvingElder, RestartConvoy, and
DuelingUpgrades. We discuss when and on which systems
these pathologies can occur and show that they actually
manifest within TM workloads. The insight provided by
these pathologies motivated four enhanced systems that
often significantly reduce transactional memory
overhead. Importantly, by avoiding transaction
pathologies, each enhanced system performs well across
our suite of benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "contention management; hardware; pathology;
performance; transactional memory",
}
@Article{Ramadan:2007:MTT,
author = "Hany E. Ramadan and Christopher J. Rossbach and Donald
E. Porter and Owen S. Hofmann and Aditya Bhandari and
Emmett Witchel",
title = "{MetaTM\slash TxLinux}: transactional memory for an
operating system",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "92--103",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250675",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper quantifies the effect of architectural
design decisions on the performance of TxLinux. TxLinux
is a Linux kernel modified to use transactions in place
of locking primitives in several key subsystems. We run
TxLinux on MetaTM, which is a new hardware-transaction
memory (HTM) model. MetaTM contains features that
enable efficient and correct interrupt handling for an
x86-like architecture. Live stack overwrites can
corrupt non-transactional stack memory and requires a
small change to the transaction register checkpoint
hardware to ensure correct operation of the operating
system. We also propose stack based early release to
reduce spurious conflicts on stack memory between
kernel code and interrupt handlers. We use MetaTM to
examine the performance sensitivity of individual
architectural features. For TxLinux we find that Polka
and SizeMatters are effective contention management
policies, some form of backoff on transaction
contention is vital for performance,and stalling on a
transaction conflict reduces transaction restart rates,
but does not improve performance. Transaction write
sets are small, and performance is insensitive to
transaction abort costs but sensitive to commit
costs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "MetaTM; OS support; transactional memory; TxLinux",
}
@Article{Shriraman:2007:IHS,
author = "Arrvindh Shriraman and Michael F. Spear and Hemayet
Hossain and Virendra J. Marathe and Sandhya Dwarkadas
and Michael L. Scott",
title = "An integrated hardware-software approach to flexible
transactional memory",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "104--115",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250676",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "There has been considerable recent interest in both
hardware and software transactional memory (TM). We
present an intermediate approach, in which hardware
serves to accelerate a TM implementation controlled
fundamentally by software. Specifically, we describe an
alert on update mechanism (AOU) that allows a thread to
receive fast, asynchronous notification when
previously-identified lines are written by other
threads, and a programmable data isolation mechanism
(PDI) that allows a thread to hide its speculative
writes from other threads, ignoring conflicts, until
software decides to make them visible. These mechanisms
reduce bookkeeping, validation, and copying overheads
without constraining software policy on a host of
design decisions.\par
We have used AOU and PDI to implement a
hardware-accelerated-software transactional memory
system we call RTM. We have also used AOU alone to
create a simpler 'RTM-Lite'. Across a range of
microbenchmarks, RTM outperforms RSTM, a publicly
available software transactional memory system, by as
much as 8.7x (geometric mean of 3.5x) in single-thread
mode. At 16 threads, it outperforms RSTM by as much as
5x, with an average speedup of 2x. Performance degrades
gracefully when transactions overflow hardware
structures. RTM-Lite is slightly faster than RTM for
transactions that modify only small objects; full RTM
is significantly faster when objects are large. In a
strong argument for policy flexibility, we find that
the choice between eager (first-access) and lazy
(commit-time) conflict detection can lead to
significant performance differences in both directions,
depending on application characteristics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache coherence; multiprocessors; RSTM; transactional
memory",
}
@Article{Abad:2007:RRE,
author = "Pablo Abad and Valentin Puente and Jos{\'e} Angel
Gregorio and Pablo Prieto",
title = "Rotary router: an efficient architecture for {CMP}
interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "116--125",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250678",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The trend towards increasing the number of processor
cores and cache capacity in future Chip-Multiprocessors
(CMPs), will require scalable packet-switched
interconnection networks adapted to the restrictions
imposed by the CMP environment. This paper presents an
innovative router design, which successfully addresses
CMP cost/performance constraints. The router structure
is based on two independent rings, which force packets
to circulate either clockwise or anti-clockwise,
traveling through every port of the router. It uses a
completely decentralized scheduling scheme, which
allows the design to: (1) take advantage of wide links,
(2) reduce Head of Line blocking, (3) use adaptive
routing, (4) be topology agnostic, (5) scale with
network degree, and (6) have reasonable power
consumption and implementation cost. A thorough
comparative performance analysis against competitive
conventional routers shows an advantage for our
proposal of up to 50 \% in terms of raw performance and
nearly 60 \% in terms of energy-delay product.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multi-processors; interconnection networks;
router architecture",
}
@Article{Kim:2007:FBC,
author = "John Kim and William J. Dally and Dennis Abts",
title = "Flattened butterfly: a cost-efficient topology for
high-radix networks",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "126--137",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250679",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Increasing integrated-circuit pin bandwidth has
motivated a corresponding increase in the degree or
radix of interconnection networks and their routers.
This paper introduces the flattened butterfly, a
cost-efficient topology for high-radix networks. On
benign (load-balanced) traffic, the flattened butterfly
approaches the cost/performance of a butterfly network
and has roughly half the cost of a comparable
performance Clos network. The advantage over the Clos
is achieved by eliminating redundant hops when they are
not needed for load balance. On adversarial traffic,
the flattened butterfly matches the cost/performance of
a folded-Clos network and provides an order of
magnitude better performance than a conventional
butterfly. In this case, global adaptive routing is
used to switch the flattened butterfly from minimal to
non-minimal routing --- using redundant hops only when
they are needed. Minimal and non-minimal, oblivious and
adaptive routing algorithms are evaluated on the
flattened butterfly. We show that load-balancing
adversarial traffic requires nonminimal
globally-adaptive routing and show that sequential
allocators are required to avoid transient load
imbalance when using adaptive routing algorithms. We
also compare the cost of the flattened butterfly to
folded-Clos, hypercube,and butterfly networks with
identical capacity and show that the flattened
butterfly is more cost-efficient than folded-Clos and
hypercube topologies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cost model; flattened butterfly; global adaptive
routing; high-radix routers; interconnection networks;
topology",
}
@Article{Kim:2007:NDD,
author = "Jongman Kim and Chrysostomos Nicopoulos and Dongkook
Park and Reetuparna Das and Yuan Xie and Vijaykrishnan
Narayanan and Mazin S. Yousif and Chita R. Das",
title = "A novel dimensionally-decomposed router for on-chip
communication in {$3$D} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "138--149",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250680",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Much like multi-story buildings in densely packed
metropolises, three-dimensional (3D) chip structures
are envisioned as a viable solution to skyrocketing
transistor densities and burgeoning die sizes in
multi-core architectures. Partitioning a larger die
into smaller segments and then stacking them in a 3D
fashion can significantly reduce latency and energy
consumption. Such benefits emanate from the notion that
inter-wafer distances are negligible compared to
intra-wafer distances. This attribute substantially
reduces global wiring length in 3D chips. The work in
this paper integrates the increasingly popular idea of
packet-based Networks-on-Chip (NoC) into a 3D setting.
While NoCs have been studied extensively in the 2D
realm, the microarchitectural ramifications of moving
into the third dimension have yet to be fully explored.
This paper presents a detailed exploration of
inter-strata communication architectures in 3D NoCs.
Three design options are investigated; a simple
bus-based inter-wafer connection, a hop-by-hop standard
3D design, and a full 3D crossbar implementation. In
this context, we propose a novel partially-connected 3D
crossbar structure, called the 3D
Dimensionally-Decomposed (DimDe) Router, which provides
a good tradeoff between circuit complexity and
performance benefits. Simulation results using (a) a
stand-alone cycle-accurate 3D NoC simulator running
synthetic workloads, and (b) a hybrid 3D NoC/cache
simulation environment running real commercial and
scientific benchmarks, indicate that the proposed DimDe
design provides latency and throughput improvements of
over 20\% on average over the other 3D architectures,
while remaining within 5\% of the full 3D crossbar
performance. Furthermore, based on synthesized hardware
implementations in 90 nm technology, the DimDe
architecture outperforms all other designs -- including
the full 3D crossbar -- by an average of 26\% in terms
of the Energy-Delay Product (EDP).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "3D architecture; 3D integration; network-on-chip
(NoC)",
}
@Article{Kumar:2007:EVC,
author = "Amit Kumar and Li-Shiuan Peh and Partha Kundu and
Niraj K. Jha",
title = "Express virtual channels: towards the ideal
interconnection fabric",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "150--161",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250681",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Due to wire delay scalability and bandwidth
limitations inherent in shared buses and dedicated
links, packet-switched on-chip interconnection networks
are fast emerging as the pervasive communication fabric
to connect different processing elements in many-core
chips. However, current state-of-the-art
packet-switched networks rely on complex routers which
increases the communication overhead and energy
consumption as compared to the ideal interconnection
fabric.\par
In this paper, we try to close the gap between the
state-of-the-art packet-switched network and the ideal
interconnect by proposing express virtual channels
(EVCs), a novel flow control mechanism which allows
packets to virtually bypass intermediate routers along
their path in a completely non-speculative fashion,
thereby lowering the energy/delay towards that of a
dedicated wire while simultaneously approaching ideal
throughput with a practical design suitable for on-chip
networks.\par
Our evaluation results using a detailed cycle-accurate
simulator on a range of synthetic traffic and SPLASH
benchmark traces show upto 84\% reduction in packet
latency and upto 23\% improvement in throughput while
reducing the average router energy consumption by upto
38\% over an existing state-of-the-art packet-switched
design. When compared to the ideal interconnect, EVCs
add just two cycles to the no-load latency, and are
within 14\% of the ideal throughput. Moreover, we show
that the proposed design incurs a minimal hardware
overhead while exhibiting excellent scalability with
increasing network sizes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "flow-control; packet-switching; router design",
}
@Article{Kumar:2007:CAS,
author = "Sanjeev Kumar and Christopher J. Hughes and Anthony
Nguyen",
title = "{Carbon}: architectural support for fine-grained
parallelism on chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "162--173",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250683",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Chip multiprocessors (CMPs) are now commonplace, and
the number of cores on a CMP is likely to grow
steadily. However, in order to harness the additional
compute resources of a CMP, applications must expose
their thread-level parallelism to the hardware. One
common approach to doing this is to decompose a program
into parallel 'tasks' and allow an underlying software
layer to schedule these tasks to different threads.
Software task scheduling can provide good parallel
performance as long as tasks are large compared to the
software overheads.\par
We examine a set of applications from an important
emerging domain: Recognition, Mining, and Synthesis
(RMS). Many RMS applications are compute-intensive and
have abundant thread-level parallelism, and are
therefore good targets for running on a CMP. However, a
significant number have small tasks for which software
task schedulers achieve only limited parallel
speedups.\par
We propose Carbon, a hardware technique to accelerate
dynamic task scheduling on scalable CMPs. Carbon has
relatively simple hardware, most of which can be placed
far from the cores. We compare Carbon to some highly
tuned software task schedulers for a set of RMS
benchmarks with small tasks. Carbon delivers
significant performance improvements over the best
software scheduler: on average for 64 cores, 68\%
faster on a set of loop-parallel benchmarks, and 109\%
faster on a set of task-parallel benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "architectural support; CMP; loop and task
parallelism",
}
@Article{Neelakantam:2007:HAR,
author = "Naveen Neelakantam and Ravi Rajwar and Suresh Srinivas
and Uma Srinivasan and Craig Zilles",
title = "Hardware atomicity for reliable software speculation",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "174--185",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250684",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Speculative compiler optimizations are effective in
improving both single-thread performance and reducing
power consumption, but their implementation introduces
significant complexity, which can limit their adoption,
limit their optimization scope, and negatively impact
the reliability of the compilers that implement them.
To eliminate much of this complexity, as well as
increase the effectiveness of these optimizations, we
propose that microprocessors provide
architecturally-visible hardware primitives for atomic
execution. These primitives provide to the compiler the
ability to optimize the program's hot path in
isolation, allowing the use of non-speculative
formulations of optimization passes to perform
speculative optimizations. Atomic execution guarantees
that if a speculation invariant does not hold, the
speculative updates are discarded, the register state
is restored, and control is transferred to a
non-speculative version of the code, thereby relieving
the compiler from the responsibility of generating
compensation code.\par
We demonstrate the benefit of hardware atomicity in the
context of a Java virtual machine. We find
incorporating the notion of atomic regions into an
existing compiler intermediate representation to be
natural, requiring roughly 3,000 lines of code (~3\% of
a JVM's optimizing compiler), most of which were for
region formation. Its incorporation creates new
opportunities for existing optimization passes, as well
as greatly simplifying the implementation of additional
optimizations (e.g., partial inlining, partial loop
unrolling, and speculative lock elision). These
optimizations reduce dynamic instruction count by 11\%
on average and result in a 10-15\% average speedup,
relative to a baseline compiler with a similar degree
of inlining.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "atomicity; checkpoint; isolation; Java; optimization;
speculation",
}
@Article{Ipek:2007:CFA,
author = "Engin Ipek and Meyrem Kirman and Nevin Kirman and Jose
F. Martinez",
title = "Core fusion: accommodating software diversity in chip
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "186--197",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250686",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents core fusion, a reconfigurable chip
multiprocessor(CMP) architecture where groups of
fundamentally independent cores can dynamically morph
into a larger CPU, or they can be used as distinct
processing elements, as needed at run time by
applications. Core fusion gracefully accommodates
software diversity and incremental parallelization in
CMPs. It provides a single execution model across all
configurations, requires no additional programming
effort or specialized compiler support, maintains ISA
compatibility, and leverages mature micro-architecture
technology.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; reconfigurable architectures;
software diversity",
}
@Article{Chi:2007:TQA,
author = "Eric Chi and Stephen A. Lyon and Margaret Martonosi",
title = "Tailoring quantum architectures to implementation
style: a quantum computer for mobile and persistent
qubits",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "198--209",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250687",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In recent years, quantum computing (QC) research has
moved from the realm of theoretical physics and
mathematics into real implementations. With many
different potential hardware implementations, quantum
computer architecture is a rich field with an
opportunity to solve interesting new problems and to
revisit old ones. This paper presents a QC architecture
tailored to physical implementations with highly mobile
and persistent quantum bits (qubits). Implementations
with qubit coherency times that are much longer than
operation times and qubit transportation times that are
orders of magnitude faster than operation times lend
greater flexibility to the architecture. This is
particularly true in the placement and locality of
individual qubits. For concreteness, we assume a
physical device model based on electron-spin qubits on
liquid helium (eSHe).\par
Like many conventional computer architectures, QCs
focus on the efficient exposure of parallelism. We
present here a QC microarchitecture that enjoys
increasing computational parallelism with size and
latency scaling only linearly with the number of
operations. Although an efficient and high level of
parallelism is admirable, quantum hardware is still
expensive and difficult to build, so we demonstrate how
the software may be optimized to reduce an
application's hardware requirements by 25\% with no
performance loss. Because the majority of a QC's time
and resources are devoted to quantum error correction,
we also present noise modeling results that evaluate
error correction procedures. These results demonstrate
that idle qubits in memory need only be refreshed
approximately once every one hundred operation
cycles.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "architecture; quantum",
}
@Article{Yang:2007:BSP,
author = "Xuejun Yang and Xiaobo Yan and Zuocheng Xing and Yu
Deng and Jiang Jiang and Ying Zhang",
title = "A 64-bit stream processor architecture for scientific
applications",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "210--219",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250689",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Stream architecture is a novel microprocessor
architecture with wide application potential. But as
for whether it can be used efficiently in scientific
computing, many issues await further study. This paper
first gives the design and implementation of a 64-bit
stream processor, FT64 (Fei Teng 64), for scientific
computing. The carrying out of 64-bit extension design
and scientific computing oriented optimization are
described in such aspects as instruction set
architecture, stream controller, micro controller, ALU
cluster, memory hierarchy and interconnection interface
here. Second, two kinds of communications as message
passing and stream communications are put forward. An
interconnection based on the communications is designed
for FT64-based high performance computers. Third, a
novel stream programming language, SF95 (Stream
FORTRAN95), and its compiler, SF95Compiler (Stream
FORTRAN95 Compiler), are developed to facilitate the
development of scientific applications. Finally, nine
typical scientific application kernels are tested and
the results show the efficiency of stream architecture
for scientific computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "architecture; compiler; high performance computing;
program language; scientific application; stream
processor",
}
@Article{Hughes:2007:PSA,
author = "Christopher J. Hughes and Radek Grzeszczuk and
Eftychios Sifakis and Daehyun Kim and Sanjeev Kumar and
Andrew P. Selle and Jatin Chhugani and Matthew Holliman
and Yen-Kuang Chen",
title = "Physical simulation for animation and visual effects:
parallelization and characterization for chip
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "220--231",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250690",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We explore the emerging application area of
physics-based simulation for computer animation and
visual special effects. In particular, we examine its
parallelization potential and characterize its behavior
on a chip multiprocessor (CMP). Applications in this
domain model and simulate natural phenomena, and often
direct visual components of motion pictures. We study a
set of three workloads that exemplify the span and
complexity of physical simulation applications used in
a production environment: fluid dynamics, facial
animation, and cloth simulation. They are
computationally demanding, requiring from a few seconds
to several minutes to simulate a single frame;
therefore, they can benefit greatly from the
acceleration possible with large scale
CMPs.\par
Starting with serial versions of these applications, we
parallelize code accounting for at least 96\% of the
serial execution time, targeting a large number of
threads. We then study the most expensive modules using
a simulated 64-core CMP.\par
For the code representing key modules, we achieve
parallel scaling of 45x, 50x, and 30x for fluid, face,
and cloth simulations, respectively. The modules have a
spectrum of parallel task granularity and locking
behavior, and all but one are dominated by loop-level
parallelism. Many modules operate on streams of data.
In some cases, modules iterate over their data, leading
to significant temporal locality. This streaming
behavior leads to very high on-die and main memory
bandwidth requirements. Finally, most modules have
little inter-thread communication since they are
data-parallel, but a few require heavy communication
between data-parallel operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "characterization; CMP; parallelization; physical
simulation",
}
@Article{Yeh:2007:PAR,
author = "Thomas Y. Yeh and Petros Faloutsos and Sanjay J. Patel
and Glenn Reinman",
title = "{ParallAX}: an architecture for real-time physics",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "232--243",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250691",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Future interactive entertainment applications will
feature the physical simulation of thousands of
interacting objects using explosions, breakable
objects, and cloth effects. While these applications
require a tremendous amount of performance to satisfy
the minimum frame rate of 30 FPS, there is a dramatic
amount of parallelism in future physics workloads. How
will future physics architectures leverage parallelism
to achieve the real-time constraint?.\par
We propose and characterize a set of forward-looking
benchmarks to represent future physics load and explore
the design space of future physics processors. In
response to the demand of this workload, we demonstrate
an architecture with a set of powerful cores and caches
to provide performance for the serial and coarse-grain
parallel components of physics simulation, along with a
flexible set of simple cores to exploit fine-grain
parallelism. Our architecture combines intelligent,
application-aware L2 management with dynamic
coupling\slash allocation of simple cores to complex
cores. Furthermore, we perform sensitivity analysis on
interconnect alternatives to determine how tightly to
couple these cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "application specific processor; chip multiprocessor;
interactive entertainment; physics based animation;
real-time physics; stream processing",
}
@Article{Kim:2007:AIB,
author = "Martha Mercaldi Kim and Mojtaba Mehrara and Mark Oskin
and Todd Austin",
title = "Architectural implications of brick and mortar silicon
manufacturing",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "244--253",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250693",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We introduce a novel chip fabrication technique called
'brick and mortar', in which chips are made from small,
pre-fabricated ASIC bricks and bonded in a
designer-specified arrangement to an inter-brick
communication backbone chip. The goal of brick and
mortar assembly is to provide a low-overhead method to
produce custom chips, yet with performance that tracks
an ASIC more closely than an FPGA. This paper examines
the architectural design choices in this chip-design
system. These choices include the definition of
reasonable bricks, both in functionality and size, as
well as the communication interconnect that the I/O cap
provides. To do this we synthesize candidate bricks,
analyze their area and bandwidth demands, and present
an architectural design for the inter-brick
communication network. We discuss a sample chip design,
a 16-way CMP, and analyze the costs and benefits of
designing chips with brick and mortar. We find that
this method of producing chips incurs only a small
performance loss (8\%) compared to a fully custom ASIC,
which is significantly less than the degradation seen
from other low-overhead chip options, such as FPGAs.
Finally, we measure the effect that architectural
design decisions have on the behavior of the proposed
physical brick assembly technique, fluidic
self-assembly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip assembly; design re-use; interconnect design",
}
@Article{Amin:2007:APA,
author = "Ahmed M. Amin and Mithuna Thottethodi and T. N.
Vijaykumar and Steven Wereley and Stephen C. Jacobson",
title = "{Aquacore}: a programmable architecture for
microfluidics",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "254--265",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250694",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Advances in microfluidic research has enabled
lab-on-a-chip (LoC) technology to achieve
miniaturization and integration of biological and
chemical analyses to a single chip comprising channels,
valves, mixers, heaters, separators, and sensors. These
miniature instruments appear to offer the rare
combination of faster, cheaper, and higher-precision
analyses in comparison to conventional bench-scale
methods. LoCs have been applied to diverse domains such
as proteomics, genomics, biochemistry, virology, cell
biology, and chemical synthesis. However, to date LoCs
have been designed as application-specific chips which
incurs significant design effort, turn-around time, and
cost, and degrades designer and user productivity. To
address these limitations, we envision a programmable
LoC (PLoC) and propose a comprehensive fluidic
instruction set, called AquaCore Instruction Set (AIS),
and a fluidic microarchitecture, called AquaCore, to
implement AIS. We present four key design aspects in
which the AIS and AquaCore differ from their computer
counterparts, and our design decisions made on the
basis of the implications of these differences. We
demonstrate the use of the PLoC in a range of domains
by hand-compiling real-world microfluidic assays in
AIS, and show a detailed breakdown of the execution
times for the assays and an estimate of the chip
area.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "fluidic; fluidic microarchitecture; instruction set;
microfluidics; programmable lab on a chip",
}
@Article{Wenisch:2007:MSW,
author = "Thomas F. Wenisch and Anastasia Ailamaki and Babak
Falsafi and Andreas Moshovos",
title = "Mechanisms for store-wait-free multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "266--277",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250696",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Store misses cause significant delays in shared-memory
multiprocessors because of limited store buffering and
ordering constraints required for proper
synchronization. Today, programmers must choose from a
spectrum of memory consistency models that reduce store
stalls at the cost of increased programming complexity.
Prior research suggests that the performance gap among
consistency models can be closed through
speculation--enforcing order only when dynamically
necessary. Unfortunately, past designs either provide
insufficient buffering, replace all stores with
read-modify-write operations, and/or recover from
ordering violations via impractical fine-grained
rollback mechanisms.\par
We propose two mechanisms that, together, enable
store-wait-free implementations of any memory
consistency model. To eliminate buffer-capacity-related
stalls, we propose the scalable store buffer, which
places private/speculative values directly into the L1
cache, thereby eliminating the non-scalable associative
search of conventional store buffers. To eliminate
ordering-related stalls, we propose atomic sequence
ordering, which enforces ordering constraints over
coarse-grain access sequences while relaxing order
among individual accesses. Using cycle-accurate
full-system simulation of scientific and commercial
applications, we demonstrate that these mechanisms
allow the simplified programming of strict ordering
while outperforming conventional implementations on
average by 32\% (sequential consistency), 22\% (SPARC
total store order) and 9\% (SPARC relaxed memory
order).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "memory consistency models; store buffer design",
}
@Article{Ceze:2007:BBE,
author = "Luis Ceze and James Tuck and Pablo Montesinos and
Josep Torrellas",
title = "{BulkSC}: bulk enforcement of sequential consistency",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "278--289",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250697",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "While Sequential Consistency (SC) is the most
intuitive memory consistency model and the one most
programmers likely assume, current multiprocessors do
not support it. Instead, they support more relaxed
models that deliver high performance. SC
implementations are considered either too slow or --
when they can match the performance of relaxed models
-- too difficult to implement.\par
In this paper, we propose Bulk Enforcement of SC
(BulkSC), a novel way of providing SC that is simple to
implement and offers performance comparable to Release
Consistency (RC). The idea is to dynamically group sets
of consecutive instructions into chunks that appear to
execute atomically and in isolation. The hardware
enforces SC at the coarse grain of chunks which, to the
program, appears as providing SC at the individual
memory access level. BulkSC keeps the implementation
simple by largely decoupling memory consistency
enforcement from processor structures. Moreover, it
delivers high performance by enabling full memory
access reordering and overlapping within chunks and
across chunks. We describe a complete system
architecture that supports BulkSC and show that it
delivers performance comparable to RC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bulk; chip multiprocessors; memory consistency models;
programmability; sequential consistency",
}
@Article{Diniz:2007:LPC,
author = "Bruno Diniz and Dorgival Guedes and Wagner {Meira,
Jr.} and Ricardo Bianchini",
title = "Limiting the power consumption of main memory",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "290--301",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250699",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The peak power consumption of hardware components
affects their power supply, packaging, and cooling
requirements. When the peak power consumption is high,
the hardware components or the systems that use them
can become expensive and bulky. Given that components
and systems rarely (if ever) actually require peak
power, it is highly desirable to limit power
consumption to a less-than-peak power budget, based on
which power supply, packaging, and cooling
infrastructure scan be more intelligently
provisioned.\par
In this paper, we study dynamic approaches for limiting
the power consumption of main memories. Specifically,
we propose four techniques that limit consumption by
adjusting the power states of the memory devices, as a
function of the load on the memory subsystem. Our
simulations of applications from three benchmarks
demonstrate that our techniques can consistently limit
power to a pre-established budget. Two of the
techniques can limit power with very low performance
degradation. Our results also show that, when using
these superior techniques, limiting power is at least
as effective an energy-conservation approach as
state-of-the-art techniques explicitly designed for
performance-aware energy conservation. These latter
results represent a departure from current energy
management research and practice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "main memory; performance; power and energy
management",
}
@Article{Mesa-Martinez:2007:PMV,
author = "Francisco Javier Mesa-Martinez and Joseph
Nayfach-Battilana and Jose Renau",
title = "Power model validation through thermal measurements",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "302--311",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250700",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Simulation environments are an indispensable tool in
the design, prototyping, performance evaluation, and
analysis of computer systems. Simulator must be able to
faithfully reflect the behavior of the system being
analyzed. To ensure the accuracy of the simulator, it
must be verified and determined to closely match
empirical data. Modern processors provide enough
performance counters to validate the majority of the
performance models; nevertheless, the information
provided is not enough to validate power and thermal
models.\par
In order to address some of the difficulties associated
with the validation of power and thermal models, this
paper proposes an infrared measurement setup to capture
run-time power consumption and thermal characteristics
of modern chips. We use infrared cameras with high
spatial resolution ($ 10 \times 10 $ $ \mu $ m) and
high frame rate (125fps) to capture thermal maps. To
generate a detailed power breakdown (leakage and
dynamic) for each processor floorplan unit, we employ
genetic algorithms. The genetic algorithm finds a power
equation for each floorplan block that produces the
measured temperature for a given thermal package. The
difference between the predicted power and the
externally measured power consumption for an AMD Athlon
analyzed in this paper has less than 1\% discrepancy.
As an example of applicability, we compare the obtained
measurements with CACTI power models, and propose
extensions to existing thermal models to increase
accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "power and thermal measurements",
}
@Article{Lin:2007:TMM,
author = "Jiang Lin and Hongzhong Zheng and Zhichun Zhu and
Howard David and Zhao Zhang",
title = "Thermal modeling and management of {DRAM} memory
systems",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "312--322",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250701",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With increasing speed and power density,
high-performance memories, including FB-DIMM (Fully
Buffered DIMM) and DDR2 DRAM, now begin to require
dynamic thermal management (DTM) as processors and hard
drives did. The DTM of memories, nevertheless, is
different in that it should take the processor
performance and power consumption into consideration.
Existing schemes have ignored that. In this study, we
investigate a new approach that controls the memory
thermal issues from the source generating memory
activities - the processor. It will smooth the program
execution when compared with shutting down memory
abruptly, and therefore improve the overall system
performance and power efficiency. For multicore
systems, we propose two schemes called adaptive core
gating and coordinated DVFS. The first scheme activates
clock gating on selected processor cores and the second
one scales down the frequency and voltage levels of
processor cores when the memory is to be over-heated.
They can successfully control the memory activities and
handle thermal emergency. More importantly, they
improve performance significantly under the given
thermal envelope. Our simulation results show that
adaptive core gating improves performance by up to
23.3\% (16.3\% on average) on a four-core system with
FB-DIMM when compared with DRAM thermal shutdown; and
coordinated DVFS with control-theoretic methods
improves the performance by up to 18.5\% (8.3\% on
average).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "DRAM memories; thermal management; thermal modeling",
}
@Article{Tiwari:2007:RPA,
author = "Abhishek Tiwari and Smruti R. Sarangi and Josep
Torrellas",
title = "{ReCycle}: pipeline adaptation to tolerate process
variation",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "323--334",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250703",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Process variation affects processor pipelines by
making some stages slower and others faster, therefore
exacerbating pipeline unbalance. This reduces the
frequency attainable by the pipeline. To improve
performance, this paper proposes ReCycle, an
architectural framework that comprehensively applies
cycle time stealing to the pipeline - transferring the
time slack of the faster stages to the slow ones by
skewing clock arrival times to latching elements after
fabrication. As a result, the pipeline can be clocked
with a period equal to the average stage delay rather
than the longest one. In addition, ReCycle's frequency
gains are enhanced with Donor stages, which are empty
stages added to 'donate' slack to the slow stages.
Finally, ReCycle can also convert slack into power
reductions.\par
For a 17FO4 pipeline, ReCycle increases the frequency
by 12\% and the application performance by 9\% on
average. Combining ReCycle and donor stages delivers
improvements of 36\% in frequency and 15\% in
performance on average, completely reclaiming the
performance losses due to variation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "clock skew; pipeline; process variation",
}
@Article{Sassone:2007:MSR,
author = "Peter G. Sassone and Jeff {Rupley II} and Edward
Brekelbaum and Gabriel H. Loh and Bryan Black",
title = "Matrix scheduler reloaded",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "335--346",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250704",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "From multiprocessor scale-up to cache sizes to the
number of reorder-buffer entries, microarchitects wish
to reap the benefits of more computing resources while
staying within power and latency bounds. This tension
is quite evident in schedulers, which need to be large
and single-cycle for maximum performance on
out-of-order cores. In this work we present two
straightforward modifications to a matrix scheduler
implementation which greatly strengthen its
scalability. Both are based on the simple observation
that the wakeup and picker matrices are sparse, even at
small sizes; thus small indirection tables can be used
to greatly reduce their width and latency. This
technique can be used to create quicker iso-performance
schedulers (17-58\% reduced critical path) or larger
iso-timing schedulers (7-26\% IPC increase).
Importantly, the power and area requirements of the
additional hardware are likely offset by the greatly
reduced matrix sizes and subsuming the functionality of
the power-hungry allocation CAMs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "matrix; microarchitecture; picker; scheduler; wakeup",
}
@Article{Sethumadhavan:2007:LBE,
author = "Simha Sethumadhavan and Franziska Roesner and Joel S.
Emer and Doug Burger and Stephen W. Keckler",
title = "Late-binding: enabling unordered load-store queues",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "347--357",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250705",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Conventional load/store queues (LSQs) are an
impediment to both power-efficient execution in
superscalar processors and scaling to large-window
designs. In this paper, we propose techniques to
improve the area and power efficiency of LSQs by
allocating entries when instructions issue ('late
binding'), rather than when they are dispatched. This
approach enables lower occupancy and thus smaller LSQs.
Efficient implementations of late-binding LSQs,
however, require the entries in the LSQ to be unordered
with respect to age. In this paper, we show how to
provide full LSQ functionality in an unordered design
with only small additional complexity and negligible
performance losses. We show that late-binding,
unordered LSQs work well for small-window superscalar
processors, but can also be scaled effectively to
large, kilo-window processors by breaking the LSQs into
address-interleaved banks. To handle the increased
overflows, we apply classic network flow control
techniques to the processor micronetworks, enabling
low-overhead recovery mechanisms from bank overflows.
We evaluate three such mechanisms: instruction replay,
skid buffers, and virtual-channel buffering in the
on-chip memory network. We show that for an
80-instruction window, the LSQ can be reduced to 32
entries. For a 1024-instruction window, the unordered,
late-binding LSQ works well with four banks of 48
entries each. By applying a Bloom filter as well, this
design achieves full hardware memory disambiguation for
a 1,024 instruction window while requiring low average
power per load and store access of 8 and 12 CAM
entries, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "late binding; memory disambiguation; network flow
control",
}
@Article{Leverich:2007:CMS,
author = "Jacob Leverich and Hideho Arakida and Alex
Solomatnikov and Amin Firoozshahian and Mark Horowitz
and Christos Kozyrakis",
title = "Comparing memory systems for chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "358--368",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250707",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "There are two basic models for the on-chip memory in
CMP systems: hardware-managed coherent caches and
software-managed streaming memory. This paper performs
a direct comparison of the two models under the same
set of assumptions about technology, area, and
computational capabilities. The goal is to quantify how
and when they differ in terms of performance, energy
consumption, bandwidth requirements, and latency
tolerance for general-purpose CMPs. We demonstrate that
for data-parallel applications, the cache-based and
streaming models perform and scale equally well. For
certain applications with little data reuse, streaming
scales better due to better bandwidth use and
macroscopic software prefetching. However, the
introduction of techniques such as hardware prefetching
and non-allocating stores to the cache-based model
eliminates the streaming advantage. Overall, our
results indicate that there is not sufficient advantage
in building streaming memory systems where all on-chip
memory structures are explicitly managed. On the other
hand, we show that streaming at the programming model
level is particularly beneficial, even with the
cache-based model, as it enhances locality and creates
opportunities for bandwidth optimizations. Moreover, we
observe that stream programming is actually easier with
the cache-based model because the hardware guarantees
correct, best-effort execution even when the programmer
cannot fully regularize an application's code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; coherent caches; locality
optimizations; parallel programming; streaming memory",
}
@Article{Muralimanohar:2007:IDC,
author = "Naveen Muralimanohar and Rajeev Balasubramonian",
title = "Interconnect design considerations for large {NUCA}
caches",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "369--380",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250708",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The ever increasing sizes of on-chip caches and the
growing domination of wire delay necessitate
significant changes to cache hierarchy design
methodologies. Many recent proposals advocate splitting
the cache into a large number of banks and employing a
network-on-chip (NoC) to allow fast access to nearby
banks (referred to as Non-Uniform Cache
Architectures--NUCA). Most studies on NUCA
organizations have assumed a generic NoC and focused on
logical policies for cache block placement, movement,
and search. Since wire/router delay and power are major
limiting factors in modern processors, this work
focuses on interconnect design and its influence on
NUCA performance and power. We extend the widely-used
CACTI cache modeling tool to take network design
parameters into account. With these overheads
appropriately accounted for, the optimal cache
organization is typically very different from that
assumed in prior NUCA studies. To alleviate the
interconnect delay bottleneck, we propose novel cache
access optimizations that introduce heterogeneity
within the inter-bank network. The careful
consideration of interconnect choices for a large cache
results in a 51\% performance improvement over a
baseline generic NoC and the introduction of
heterogeneity within the network yields an additional
11-15\% performance improvement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache models; interconnect; memory hierarchies;
network-on-chip; non-uniform cache architecture",
}
@Article{Qureshi:2007:AIP,
author = "Moinuddin K. Qureshi and Aamer Jaleel and Yale N. Patt
and Simon C. Steely and Joel Emer",
title = "Adaptive insertion policies for high performance
caching",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "381--391",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250709",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The commonly used LRU replacement policy is
susceptible to thrashing for memory-intensive workloads
that have a working set greater than the available
cache size. For such applications, the majority of
lines traverse from the MRU position to the LRU
position without receiving any cache hits, resulting in
inefficient use of cache space. Cache performance can
be improved if some fraction of the working set is
retained in the cache so that at least that fraction of
the working set can contribute to cache hits.\par
We show that simple changes to the insertion policy can
significantly reduce cache misses for memory-intensive
workloads. We propose the LRU Insertion Policy (LIP)
which places the incoming line in the LRU position
instead of the MRU position. LIP protects the cache
from thrashing and results in close to optimal hit rate
for applications that have a cyclic reference pattern.
We also propose the Bimodal Insertion Policy (BIP) as
an enhancement of LIP that adapts to changes in the
working set while maintaining the thrashing protection
of LIP. We finally propose a Dynamic Insertion Policy
(DIP) to choose between BIP and the traditional LRU
policy depending on which policy incurs fewer misses.
The proposed insertion policies do not require any
change to the existing cache structure, are trivial to
implement, and have a storage requirement of less than
two bytes. We show that DIP reduces the average MPKI of
the baseline 1MB 16-way L2 cache by 21\%, bridging
two-thirds of the gap between LRU and OPT.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "replacement; set dueling; set sampling; thrashing",
}
@Article{Karger:2007:PSL,
author = "Paul A. Karger",
title = "Performance and security lessons learned from
virtualizing the {Alpha} processor",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "392--401",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250711",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Virtualization has become much more important
throughout the computer industry both to improve
security and to support multiple workloads on the same
hardware with effective isolation between those
workloads. The most widely used chip architecture, the
Intel and AMD x86 processors, have begun to support
virtualization, but the initial implementations show
some limitations. This paper examines the
virtualization properties of the Alpha architecture
with particular emphasis on features that improve
performance and security. It shows how the Alpha's
features of PALcode, address space numbers, software
handling of translation buffer misses, lack of used and
modified bits, and secure handling of unpredictable
results all contribute to making virtualization of the
Alpha particularly easy. The paper then compares the
virtual architecture of the Alpha with Intel's and
AMD's virtualization approaches for x86. It also
comments briefly on Intel's virtualization technology
for Itanium, IBM's zSeries and pSeries hypervisors and
Sun's UltraSPARC virtualization. It particularly
identifies some differences between translation buffers
on x86 and translation buffers on VAX and Alpha that
can have adverse performance consequences.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "hypervisors; security; virtual machine monitors;
virtualizability",
}
@Article{Karkhanis:2007:ADA,
author = "Tejas S. Karkhanis and James E. Smith",
title = "Automated design of application specific superscalar
processors: an analytical approach",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "402--411",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250712",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Analytical modeling is applied to the automated design
of application-specific superscalar processors. Using
an analytical method bridges the gap between the size
of the design space and the time required for detailed
cycle-accurate simulations. The proposed design
framework takes as inputs the design targets (upper
bounds on execution time, area, and energy), design
alternatives, and one or more application programs. The
output is the set of out-of-order superscalar
processors that are Pareto-optimal with respect to
performance-energy-area. The core of the new design
framework is made up of analytical performance and
energy activity models, and an analytical model-based
design optimization process.\par
For a set of benchmark programs and a design space of
2000 designs, the design framework arrives at all
performance-energy-area Pareto-optimal design points
within 16 minutes on a 2 GHz Pentium-4. In contrast, it
is estimated that a na{\"\i}ve cycle-accurate
simulation-based exhaustive search would require at
least two months to arrive at the Pareto-optimal design
points for the same design space.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "analytical model; application specific processors;
design optimization; energy model; performance model",
}
@Article{Phansalkar:2007:ARA,
author = "Aashish Phansalkar and Ajay Joshi and Lizy K. John",
title = "Analysis of redundancy and application balance in the
{SPEC CPU2006} benchmark suite",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "412--423",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250713",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The recently released SPEC CPU2006 benchmark suite is
expected to be used by computer designers and computer
architecture researchers for pre-silicon early design
analysis. Partial use of benchmark suites by
researchers, due to simulation time constraints,
compiler difficulties, or library or system call issues
is likely to happen; but a random subset can lead to
misleading results. This paper analyzes the SPEC
CPU2006 benchmarks using performance counter based
experimentation from several state of the art systems,
and uses statistical techniques such as principal
component analysis and clustering to draw inferences on
the similarity of the benchmarks and the redundancy in
the suite and arrive at meaningful subsets.\par
The SPEC CPU2006 benchmark suite contains several
programs from areas such as artificial intelligence and
includes none from the electronic design automation
(EDA) application area. Hence there is a concern on the
application balance in the suite. An analysis from the
perspective of fundamental program characteristics
shows that the included programs offer characteristics
broader than the EDA programs' space. A subset of 6
integer programs and 8 floating point programs can
yield most of the information from the entire suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "benchmark; clustering; microprocessor performance
counters; SPEC",
}
@Article{Kim:2007:VPR,
author = "Hyesoon Kim and Jos{\'e} A. Joao and Onur Mutlu and
Chang Joo Lee and Yale N. Patt and Robert Cohn",
title = "{VPC} prediction: reducing the cost of indirect
branches via hardware-based dynamic devirtualization",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "424--435",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250715",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Indirect branches have become increasingly common in
modular programs written in modern object-oriented
languages and virtual machine based runtime systems.
Unfortunately, the prediction accuracy of indirect
branches has not improved as much as that of
conditional branches. Furthermore, previously proposed
indirect branch predictors usually require a
significant amount of extra hardware storage and
complexity, which makes them less attractive to
implement.\par
This paper proposes a new technique for handling
indirect branches, called Virtual Program Counter (VPC)
prediction. The key idea of VPC prediction is to treat
a single indirect branch as multiple virtual
conditional branches in hardware for prediction
purposes. Our technique predicts each of the virtual
conditional branches using the existing conditional
branch prediction hardware. Thus, no separate storage
structure is required for predicting indirect branch
targets.\par
Our evaluation shows that VPC prediction improves
average performance by 26.7\% compared to a
commonly-used branch target buffer based predictor on
12 indirect branch intensive applications. VPC
prediction achieves the performance improvement
provided by at least a 12KB (and usually a 192KB)
tagged target cache predictor on half of the examined
applications. We show that VPC prediction can be used
with any existing conditional branch prediction
mechanism and that the accuracy of VPC prediction
improves when a more accurate conditional branch
predictor is used.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "devirtualization; indirect branch prediction; virtual
functions",
}
@Article{Hilton:2007:GCI,
author = "Andrew D. Hilton and Amir Roth",
title = "{Ginger}: control independence using tag rewriting",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "436--447",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250716",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The negative performance impact of branch
mis-predictions can be reduced by exploiting control
independence (CI). When a branch mis-predicts, the
wrong-path instructions up to the point where control
converges with the correct path are selectively
squashed and replaced with correct-path instructions.
Instructions beyond the convergence-point-the branch's
control-independent (CI) instructions-are spared from
squashing. Exploiting CI requires updating the input
data dependences of CI instructions to reflect the
selective removal and insertion of logically older
instructions and transitively re-dispatching those CI
instructions whose inputs have changed. This capability
is generally called out-of-order renaming. Previously
proposed CI designs use out-of-order renaming schemes
that either consume excessive rename/dispatch
bandwidth, can only be applied in limited cases, or
incur a cost even when the branch would be correctly
predicted.\par
Ginger is a CI design that is both general and
bandwidth efficient. Ginger implements out-of-order
renaming using tag rewriting, re-linking the input
dependences of CI instructions as they sit in the
window. To do this, Ginger halts the pipeline uses the
idle map table read and write ports and the issue queue
match lines and write lines to perform a register-tag
'search-and-replace' operation. After a few cycles, the
pipeline restarts and execution resumes with correct
data dependences. Cycle-level simulation shows that
Ginger out-performs previous CI designs, yielding
geometric mean speedups over an aggressive non-CI
processor of 5\%, 12\%, and 11\%-on SPECint2000,
MediaBench, and Comm-Bench-with speedups of 15\% or
greater on 11 of 46 programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "branch misprediction; control independence;
out-of-order renaming; selective re-dispatch",
}
@Article{Al-Zawawi:2007:TCI,
author = "Ahmed S. Al-Zawawi and Vimal K. Reddy and Eric
Rotenberg and Haitham H. Akkary",
title = "Transparent control independence {(TCI)}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "448--459",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250717",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Superscalar architectures have been proposed that
exploit control independence, reducing the performance
penalty of branch mispredictions by preserving the work
of future misprediction-independent instructions. The
essential goal of exploiting control independence is to
completely decouple future misprediction-independent
instructions from deferred misprediction-dependent
instructions. Current implementations fall short of
this goal because they explicitly maintain program
order among misprediction-independent and
misprediction-dependent instructions. Explicit
approaches sacrifice design efficiency and ultimately
performance.\par
We observe it is sufficient to emulate program order.
Potential misprediction-dependent instructions are
singled out a priori and their unchanging source values
are checkpointed. These instructions and values are set
aside as a 'recovery program'. Checkpointed source
values break the data dependencies with co-mingled
misprediction-independent instructions - now long since
gone from the pipeline - achieving the essential
decoupling objective. When the mispredicted branch
resolves, recovery is achieved by fetching the
self-sufficient, condensed recovery program. Recovery
is effectively transparent to the pipeline, in that
speculative state is not rolled back and recovery
appears as a jump to code. A coarse-grain retirement
substrate permits the relaxed order between the
decoupled programs. Transparent control independence
(TCI) yields a highly streamlined pipeline that quickly
recycles resources based on conventional speculation,
enabling a large window with small cycle-critical
resources, and prevents many mispredictions from
disrupting this large window.\par
TCI achieves speedups as high as 64\% (16\% average)
and 88\% (22\% average) for 4-issue and 8-issue
pipelines, respectively, among 15 SPEC integer
benchmarks. Factors that limit the performance of
explicitly ordered approaches are quantified.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "branch prediction; checkpoints; control independence;
selective re-execution; selective recovery;
speculation",
}
@Article{Wang:2007:EAA,
author = "Nicholas J. Wang and Aqeel Mahesri and Sanjay J.
Patel",
title = "Examining {ACE} analysis reliability estimates using
fault-injection",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "460--469",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1273440.1250719",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "ACE analysis is a technique to provide an early
reliability estimate for microprocessors. ACE analysis
couples data from abstract performance models with low
level design details to identify and rule out transient
faults that will not cause incorrect execution. While
many transient faults are analyzable in ACE analysis
frameworks, some are not. As a result, ACE analysis is
conservative and provides a lower bound for the
reliability of a processor design. Bounding the
reliability of a design is useful since it can
guarantee that the given design will meet reliability
goals.\par
In this work, we quantify and identify the sources of
ACE analysis conservatism by comparing an ACE analysis
methodology against a rigorous fault-injection study.
We evaluate two flavors of ACE analysis: a 'simple'
analysis and a refined analysis, finding that even the
refined analysis overestimates the soft error
vulnerability of an instruction scheduler by 2-3x. The
conservatism stems from two key sources: from lack of
detail in abstract performance models and from what we
term Y-Bits, a result of the single-pass simulation
methodology that is typical of ACE analysis. We also
examine the efficacy of applying ACE analysis to a
class of 'partial coverage' error mitigation
techniques. In particular, we perform a case study on
one such technique and extrapolate our findings to
others.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "fault tolerance; measurement techniques;
microprocessors; soft errors",
}
@Article{Aggarwal:2007:CIB,
author = "Nidhi Aggarwal and Parthasarathy Ranganathan and
Norman P. Jouppi and James E. Smith",
title = "Configurable isolation: building high availability
systems with commodity multi-core processors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "470--481",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250720",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "High availability is an increasingly important
requirement for enterprise systems, often valued more
than performance. Systems designed for high
availability typically use redundant hardware for error
detection and continued uptime in the event of a
failure. Chip multiprocessors with an abundance of
identical resources like cores, cache and
interconnection networks would appear to be ideal
building blocks for implementing high availability
solutions on chip. However, doing so poses significant
challenges with respect to error containment and faulty
component replacement. Increasing silicon and transient
fault rates with future technology scaling exacerbate
the problem. This paper proposes a novel,
cost-effective, architecture for high availability
systems built from future multi-core processors. We
propose a new chip multiprocessor architecture that
provides configurable isolation for fault containment
and component retirement, based upon cost-effective
modifications to commodity designs. The design is
evaluated for a state-of-the-art industrial fault model
and the proposed architecture is shown to provide
effective fault isolation and graceful degradation even
when the failure rate is high.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; fault isolation; high
availability",
}
@Article{Dalton:2007:RFI,
author = "Michael Dalton and Hari Kannan and Christos
Kozyrakis",
title = "{Raksha}: a flexible information flow architecture for
software security",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "482--493",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250722",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "High-level semantic vulnerabilities such as SQL
injection and cross-site scripting have surpassed
buffer overflows as the most prevalent security
exploits. The breadth and diversity of software
vulnerabilities demand new security solutions that
combine the speed and practicality of hardware
approaches with the flexibility and robustness of
software systems.\par
This paper proposes Raksha, an architecture for
software security based on dynamic information flow
tracking (DIFT). Raksha provides three novel features
that allow for a flexible hardware/software approach to
security. First, it supports flexible and programmable
security policies that enable software to direct
hardware analysis towards a wide range of high-level
and low-level attacks. Second, it supports multiple
active security policies that can protect the system
against concurrent attacks. Third, it supports
low-overhead security handlers that allow software to
correct, complement, or extend the hardware-based
analysis without the overhead associated with operating
system traps.\par
We present an FPGA prototype for Raksha that provides a
full featured Linux workstation for security analysis.
Using unmodified binaries for real-world applications,
we demonstrate that Raksha can detect high-level
attacks such as directory traversal, command injection,
SQL injection, and cross-site scripting as well as
low-level attacks such as buffer overflows. We also
show that low overhead exception handling is critical
for analyses such as memory corruption protection in
order to address false positives that occur due to the
diverse code patterns in frequently used software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dynamic; semantic vulnerabilities; software security",
}
@Article{Wang:2007:NCD,
author = "Zhenghong Wang and Ruby B. Lee",
title = "New cache designs for thwarting software cache-based
side channel attacks",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "494--505",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250723",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software cache-based side channel attacks are a
serious new class of threats for computers. Unlike
physical side channel attacks that mostly target
embedded cryptographic devices, cache-based side
channel attacks can also undermine general purpose
systems. The attacks are easy to perform, effective on
most platforms, and do not require special instruments
or excessive computation power. In recently
demonstrated attacks on software implementations of
ciphers like AES and RSA, the full key can be recovered
by an unprivileged user program performing simple
timing measurements based on cache misses.\par
We first analyze these attacks, identifying cache
interference as the root cause of these attacks. We
identify two basic mitigation approaches: the
partition-based approach eliminates cache interference
whereas the randomization-based approach randomizes
cache interference so that zero information can be
inferred. We present new security-aware cache designs,
the Partition-Locked cache (PLcache) and Random
Permutation cache (RPcache), analyze and prove their
security, and evaluate their performance. Our results
show that our new cache designs with built-in security
can defend against cache-based side channel attacks in
general-rather than only specific attacks on a given
cryptographic algorithm-with very little performance
degradation and hardware cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache; computer architecture; processor; security;
side channel; timing attacks",
}
@Article{Soundararajan:2007:MBV,
author = "Niranjan Kumar Soundararajan and Angshuman Parashar
and Anand Sivasubramaniam",
title = "Mechanisms for bounding vulnerabilities of processor
structures",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "506--515",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250725",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Concern for the increasing susceptibility of processor
structures to transient errors has led to several
recent research efforts that propose architectural
techniques to enhance reliability. However, real
systems are typically required to satisfy hard
reliability budgets, and barring expensive
full-redundancy approaches, none of the proposed
solutions treat any reliability budgets or bounds as
hard constraints. Meeting vulnerability bounds requires
monitoring vulnerabilities of processor structures and
taking appropriate actions whenever these bounds are
violated. This mandates treating reliability as a
first-order microarchitecture design constraint, while
optimizing performance as long as reliability
requirements are satisfied. This paper makes three key
contributions towards this goal: (i) we present a
simple infrastructure to monitor and provide upper
bounds on the vulnerabilities of key processor
structures at cycle-level fidelity; (ii) we propose two
distinct control mechanisms - throttling and selective
redundancy - to proactively and/or reactively bound the
vulnerabilities to any limit specified by the system
designer; (iii) within this framework, we propose a
novel adaptation of Out-of-Order Commit for
vulnerability reduction, which automatically provides
additional leverage for the control mechanisms to boost
performance while remaining within the reliability
budget.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "microarchitecture; redundant threading; transient
faults",
}
@Article{Walcott:2007:DPA,
author = "Kristen R. Walcott and Greg Humphreys and Sudhanva
Gurumurthi",
title = "Dynamic prediction of architectural vulnerability from
microarchitectural state",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "2",
pages = "516--527",
month = may,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1250662.1250726",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:43 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Transient faults due to particle strikes are a key
challenge in microprocessor design. Driven by
exponentially increasing transistor counts, per-chip
faults are a growing burden. To protect against soft
errors, redundancy techniques such as redundant
multithreading (RMT) are often used. However, these
techniques assume that the probability that a
structural fault will result in a soft error (i.e., the
Architectural Vulnerability Factor (AVF)) is 100
percent, unnecessarily draining processor resources.
Due to the high cost of redundancy, there have been
efforts to throttle RMT at runtime. To date, these
methods have not incorporated an AVF model and
therefore tend to be ad hoc. Unfortunately, computing
the AVF of complex microprocessor structures (e.g., the
ISQ) can be quite involved.\par
To provide probabilistic guarantees about fault
tolerance, we have created a rigorous characterization
of AVF behavior that can be easily implemented in
hardware. We experimentally demonstrate AVF variability
within and across the SPEC2000 benchmarks and identify
strong correlations between structural AVF values and a
small set of processor metrics. Using these simple
indicators as predictors, we create a proof-of-concept
RMT implementation that demonstrates that AVF
prediction can be used to maintain a low fault
tolerance level without significant performance
impact.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "architecture vulnerability factor; microarchitecture;
performance; redundant multithreading; reliability",
}
@Article{Aggarwal:2007:ISI,
author = "Aneesh Aggarwal and Pradip Bose and Mohamed Zahran",
title = "Introduction to the special issue on the {2006
Reconfigurable and Adaptive Architecture Workshop}",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "1--1",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294317",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The papers that follow comprise the proceedings of the
first Reconfigurable and Adaptive Architecture Workshop
(RAAW 2006) that was held in conjunction with the
39$^{th}$ International Conference on Microarchitecture
in Orlando, Florida.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bellas:2007:MSA,
author = "Nikolaos Bellas and Sek M. Chai and Malcolm Dwyer and
Dan Linzmeier",
title = "Mapping streaming architectures on reconfigurable
platforms",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "2--8",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294318",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware accelerators, used as application-specific
extensions to the computational capabilities of a
system, are efficient mechanisms to enhance the
performance and reduce the power dissipation in a
System On Chip (SoC). These accelerators execute on the
computationally critical part of the application, and
offload computations from the scalar processors. In
this paper, we present a design automation tool that
generates accelerators based on a given application
kernel. The accelerators are processing streaming data,
and support a programming model which can naturally
express a large number of embedded applications, and
which results in efficient and fast hardware
implementations. We demonstrate the applicability of
the tool for architectural space exploration for a
number of media applications, with results on area,
throughput, and clock speeds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Labrecque:2007:CCG,
author = "Martin Labrecque and Peter Yiannacouras and J. Gregory
Steffan",
title = "Custom code generation for soft processors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "9--19",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294319",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Embedded systems designers that use FPGAs are
increasingly including soft processors in their designs
(configurable processors built in the programmable
logic of the FPGA). While there has been a significant
amount of research on adding custom instructions and
accelerators to soft processors, these are typically
used to extend an unmodified base ISA targeted by
generic compilation such as with unmodified gcc. In
this paper we explore several opportunities for the
compiler to optimize the code generated for soft
processors through application-specific customization
of the base ISA---techniques that are orthogonal to
adding custom instructions. In particular we explore:
(i) low level software-hardware trade-offs between
basic instructions; (ii) the utility of ISA-specific
features---in particular for the delay slots and Hi/Lo
registers in the MIPS ISA; and (iii) application
specific register management. We find that through
these techniques that have no hardware cost we can
improve the area efficiency of soft processors by 12\%
on average across a suite of benchmarks, and by up to
47\% in the best case.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Suri:2007:IIL,
author = "Tameesh Suri",
title = "Improving instruction level parallelism through
reconfigurable units in superscalar processors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "20--27",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294320",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With reducing feature sizes, more transistors can be
integrated on the chip. The increased transistor budget
can be utilized to improve the instruction level
parallelism (ILP) exploited from the processor.
However, the transistors cannot be used to arbitrarily
increase the processor width and size in the hope of
exploiting better ILP. In this paper, we propose an
architecture where the superscalar datapath is tightly
coupled with a reconfigurable unit (RFU). The
reconfiguration unit is configured to execute the
traces of dynamic instructions that are frequently
executed. To address the data dependency issues between
the instructions in the superscalar and the RFU, we
propose to execute the trace on the RFU with predicted
values. When the trace instructions reach the issue
queue in the superscalar, the predictions are
validated. In this technique, performance improvement
is obtained for correct prediction, whereas no
performance degradation is incurred for mispredictions.
With this architecture, we observe an average
instructions per cycle (IPC) improvement of about 11\%
over the simulated SPEC 2000 benchmarks, using a very
small last value data value predictor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Najaf-abadi:2007:ACE,
author = "Hashem H. Najaf-abadi and Eric Rotenberg",
title = "Architectural {\em contesting\/}: exposing and
exploiting temperamental behavior",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "28--35",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294321",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Previous studies have proposed techniques to
dynamically change the architecture of a processor to
better suit the characteristics of the workload at
hand. However, all such approaches are prone to a
fundamental trade-off between the architectural
diversity they can provide and the latency of
architectural change, their fixed-configuration
performance and the complexity of finding the best
architectural configuration for the workload at hand.
In this study we argue that the full potential of
dynamic architectural customization can only be
achieved by diminishing the effect of the degree of
available architectural diversity on the aforementioned
performance factors.\par
The performance of a statically designed processing
core in a heterogeneous multi-core system is
independent of the architectural diversity available.
In addition, it is apparent that concurrent execution
of code on differently architected cores automatically
reveals which architecture is more suitable for the
characteristics of a particular workload.\par
We therefore propose architectural contesting; the
redundant execution of code on a number of differently
architected processors (each customized for a different
set of workload characteristics) in a leader follower
arrangement, such that the leader and follower cores
continuously shift roles as one core or the other
becomes more favorable for new code phases. In this
manner effective execution is naturally transferred
from one static architecture to the other with little
latency.\par
In this study, we show that the contesting of only
processor width can yield an average speedup of 7.5\%
and up to 12.5\% in integer SPEC benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tseng:2007:DHS,
author = "Kuo-Kun Tseng and Ying-Dar Lin and Tsern-Huei Lee and
Yuan-Cheng Lai",
title = "Deterministic high-speed root-hashing automaton
matching coprocessor for embedded network processor",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "36--43",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294314",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "While string matching plays an important role in deep
packet inspection applications, its software algorithms
are insufficient to meet the demands of high-speed
performance. Accordingly, we were motivated to propose
fast and deterministic performance root-hashing
automaton matching (RHAM) coprocessor for embedded
network processor. Although automaton algorithms are
robust with deterministic matching time, there is still
plenty of room for improvement of their average-case
performance. The proposed RHAM employs novel
root-hashing technique to accelerate automaton
matching. In our experiment, RHAM is implemented in a
prevalent automaton algorithm, Aho--Corasick (AC) which
is often used in many packet inspection applications.
Compared to the original AC, RHAM only requires extra
vector size in 48 Kbytes for root-hashing, and has
about 900\% and 420\% outperformance for 20,000 URLs
and 10,000 virus patterns respectively. Implementation
of RHAM FPGA can perform at the rate of 12.6 Gbps with
the pattern amount in 34,215 bytes. This is superior to
all previous matching hardware in terms of throughput
and pattern set.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "coprocessor; finite automaton; hashing; packet
inspection; string matching",
}
@Article{Sibai:2007:PAW,
author = "Fadi N. Sibai",
title = "Performance analysis and workload characterization of
the {$3$DMark05} benchmark on modern parallel computer
platforms",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "44--52",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294315",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With ever increasing CPU and graphics card speeds, and
improved sophistication, stunning visual effects, and
growing scene detail and real life-like content of 3D
games, 3DMark{\reg} emerged as the leading PC benchmark
for 3D gaming performance with several millions of
worldwide downloads. Its tests are at the cutting edge
of consumer graphics and push the limit of 3D rendering
with spectacular scenes, and state of the art lighting
techniques. The benchmark scores help quickly
differentiate the platforms with state of the art
graphic cards and processors from those with older
components. In this paper, we analyze the scaling of
the 3DMark{\reg}05 benchmark with CPU frequency, number
of CPUs, number of GPUs, and number of threads
supported by the hardware. We also characterize the
benchmark's workload. These results reveal that the
benchmark scales well indicating that 3D games if
implemented with multiple Physics and Artificial
Intelligence or other relevant content threads should
show good scaling too on multi-CPU and multi-GPU
platforms. The characterization results reveal the
close dependence of 3D graphics applications on the
memory subsystem's performance as 1 out of 2
instructions is a load or store instruction. The
results also revealed that there is a direct
correlation with the Game Tests' performance and the
number of cache memory read misses per instruction
retired, the number of stores retired per instruction
retired, the number of polygons per Draw*Primitive; and
the number of set-vertex shader calls per frame. All
these events relate to the memory subsystem performance
generally linking the 3D graphics applications'
performance and the 3DMark{\reg} overall score to the
platform's memory performance. Salient
microarchitectural performance events of the CPU tests
were also memory-related.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "3D graphics performance; multiple CPU and GPU core
platforms; workload characterization",
}
@Article{Thorson:2007:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "3",
pages = "53--55",
month = jun,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1294313.1294323",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:48:27 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on the Internet---an international
computer network.\par
As always, the opinions expressed in this column are
the personal views of the authors, and do not
necessarily represent the institutions to which they
are affiliated.\par
Text which sets the context of a message appears
underlined or in italics; this is usually text the
author has quoted from earlier messages. The code-like
expressions below the authors' names are their
addresses on Internet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bartolini:2007:MPD,
author = "S. Bartolini and P. Foglia and C. A. Prete",
title = "{MEmory} performance: {DEaling} with applications,
systems and architecture",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "4--5",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327314",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this issue, we present the papers from MEDEA-2006
Workshop [3] held in conjunction with the IEEE-ACM
International Conference on Parallel Architectures and
Compilation Techniques (PACT-2006) [1,2].",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Medea 2006 workshop.",
}
@Article{Lorton:2007:ABL,
author = "K. Patrick Lorton and David S. Wise",
title = "Analyzing block locality in {Morton}-order and
{Morton}-hybrid matrices",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "6--12",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327315",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As the architectures of computers change, introducing
more caches onto multicore chips, even more locality
becomes necessary. With the bandwidth between caches
and RAM now even more valuable, additional locality
from new matrix representations will be important to
keep multiple processors busy. The default storage
representations of both C and Fortran, row- and
column-major respectively, have fundamental
deficiencies with many matrix computations. By
switching the storage representation from Cartesian to
block indices, one is able to take better advantage of
cache locality at all levels from L1 to paging. This
paper only changes storage representation from
row-major to Morton-hybrid, and applies it to matrix
multiplication. Its purpose is to show that, even with
only traditional iterative algorithms, simply changing
storage representation offers significant speedups.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "Cholesky factorization; Morton order; quadtrees",
remark = "Medea 2006 workshop.",
}
@Article{Deris:2007:ICE,
author = "Kaveh Jokar Deris and Amirali Baniasadi",
title = "Investigating cache energy and latency break-even
points in high performance processors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "13--20",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327316",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this work we study how cache complexity impacts
energy and performance in high performance processors.
Moreover, we estimate cache energy budget for two high
performance processors. We calculate energy and latency
break-even points for realistic and ideal cache
organizations for different applications. We show that
design efforts made to reduce cache miss rate are only
justifiable from the energy and performance point of
view only if the associated latency and energy overhead
remain below the calculated break-even
points.\par
Furthermore, we show that, for the processors and
applications studied here, the instruction cache has a
lower latency break-even point compared to the data
cache. However, investing energy in the data cache is
likely to result in better energy efficiency compared
to the instruction cache.\par
We also study alternative cache configurations for
different processors and investigate if such
alternatives would improve energy efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Medea 2006 workshop.",
}
@Article{Yan:2007:EIC,
author = "Jun Yan and Wei Zhang",
title = "Evaluating instruction cache vulnerability to
transient errors",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "21--28",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327317",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent research shows that microprocessors are
increasingly susceptible to transient errors. In order
to protect microprocessors cost-effectively, the first
step is to accurately understand the impact of
transient errors on the system reliability. While many
research efforts have been focused on studying the
vulnerability of data caches and other on-chip hardware
components, instruction caches have received less
attention. However, instructions are read every cycle,
any undetected or uncorrected soft errors in
instructions can lead to erroneous computation, wrong
control flow or system crash.\par
This paper studies the instruction cache vulnerability
by considering both the raw SRAM rate and the cache
vulnerability factor. Based on the concept of cache
vulnerability factor, we also investigate the impact of
different cache configuration parameters on the
reliability of instruction caches. We find that on
average 67.5\% of instruction cache soft errors can be
masked by the I-cache itself without impacting other
system components. While quantifying the instruction
cache vulnerability itself does not solve the
reliability problem of instruction cache against
transient errors, we believe this work can provide
useful insights for designers to develop cost-effective
solutions to protect I-caches and to optimally balance
the reliability of instruction caches with other system
goals, such as cost, performance and energy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Medea 2006 workshop.",
}
@Article{Ramirez:2007:EST,
author = "Tanaus{\'u} Ram{\'\i}rez and Alex Pajuelo and Oliverio
J. Santana and Mateo Valero",
title = "Energy saving through a simple load control
mechanism",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "29--36",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327318",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To alleviate the memory wall problem, current
architectural trends suggest implementing large
instruction windows able to maintain a high number of
in-flight instructions. However, the benefits achieved
by these recent proposals may be limited because more
instructions are executed down the wrong path of a
mispredicted branch. The larger number of misspeculated
instructions involves increasing the energy consumed
compared to traditional designs with smaller
instruction windows. Our analysis shows that, for some
SPEC2000 integer benchmarks, up to 2, 5X wrong-path
load instructions are executed when the instruction
window of a 4-way superscalar processor is increased
from 256 to 1024 entries.\par
This paper describes a simple speculative control
technique to prevent wrong-path load instructions from
being executed. Our technique extends the functionality
of the load-store queue to block those load
instructions that depend on a hard-to-predict
conditional branch until it is resolved. If the branch
is actually mispredicted, unnecessary cache misses can
be avoided, saving energy down the wrong path.
Furthermore, instructions that depend on a blocked load
are not issued because their source values are not
available, which also saves dynamic energy. Our results
show that the proposed mechanism reduces, on average,
up to 26\% misspeculated load instructions and 18\%
wrong-path instructions without any performance loss.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "branch prediction; confidence estimation; energy
saving; kilo-instruction processors",
remark = "Medea 2006 workshop.",
}
@Article{Ramos:2007:DPC,
author = "Luis M. Ramos and Jos{\'e} Luis Briz and Pablo E.
Ib{\'a}{\~n}ez and Victor Vi{\~n}als",
title = "Data prefetching in a cache hierarchy with high
bandwidth and capacity",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "37--44",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327319",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper we evaluate four hardware data
prefetchers in the context of a high-performance
three-level on chip cache hierarchy with high bandwidth
and capacity. We consider two classic prefetchers
(Sequential Tagged and Stride) and two correlating
prefetchers: PC/DC, a recent method with a superior
score and low-sized tables, and P-DFCM, a new method.
Like PC/DC, P-DFCM focuses on local delta sequences,
but it is based on the DFCM value predictor. We explore
different prefetch degrees and distances. Running
SPEC2000, Olden and IAbench applications, results show
that this kind of cache hierarchy turns prefetching
aggressiveness into success for the four prefetchers.
Sequential Tagged is the best, and deserves further
attention to cut it losses in some applications. PC/DC
results are matched or even improved by P-DFCM, using
far fewer accesses to tables while keeping sizes low.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "hardware data prefeching",
remark = "Medea 2006 workshop.",
}
@Article{Dybdahl:2007:LBR,
author = "Haakon Dybdahl and Per Stenstr{\"o}m and Lasse
Natvig",
title = "An {LRU}-based replacement algorithm augmented with
frequency of access in shared chip-multiprocessor
caches",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "45--52",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327320",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes a new replacement algorithm to
protect cache lines with potential future reuse from
being evicted. In contrast to the recency based
approaches used in the past (LRU for example), our
algorithm also uses the notion of frequency of access.
Instead of evicting the least recently used block, our
algorithm identifies among a set of LRU blocks the one
that is also least-frequently-used (according to a
heuristic) and chooses that as a victim. We have
implemented this replacement algorithm in a detailed
simulation model of a chip multiprocessor system driven
by SPEC2000 benchmarks. We have found that the new
scheme improves performance for memory intensive
applications. Moreover, as compared to other attempts,
our replacement algorithm provides robust improvements
across all benchmarks. We have also extended an earlier
scheme proposed by Wong and Baer so it is switched off
when performance is not improved. Our results show that
this makes the scheme much more suitable for CMP
configurations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Medea 2006 workshop.",
}
@Article{Bardine:2007:IPE,
author = "A. Bardine and P. Foglia and G. Gabrielli and C. A.
Prete and P. Stenstr{\"o}m",
title = "Improving power efficiency of {D-NUCA} caches",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "53--58",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327321",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "D-NUCA caches are cache memories that, thanks to
banked organization, broadcast search and
promotion/demotion mechanism, are able to tolerate the
increasing wire delay effects introduced by technology
scaling. As a consequence, they will outperform
conventional caches (UCA, Uniform Cache Architectures)
in future generation cores.\par
Due to the promotion/demotion mechanism, we have found
that, in a D-NUCA cache, the distribution of hits on
the ways varies across applications as well as across
different execution phases within a single application.
In this paper, we show how such a behavior can be
utilized to improve D-NUCA power efficiency as well as
to decrease its access latencies. In particular, we
propose a new D-NUCA structure, called Way Adaptable
D-NUCA cache, in which the number of active (i.e.
powered-on) ways is dynamically adapted to the need of
the running application. Our initial evaluation shows
that a consistent reduction of both the average number
of active ways (42\% in average) and the number of bank
access requests (29\% in average) is achieved, without
significantly affecting the IPC.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "Medea 2006 workshop.",
}
@Article{Thorson:2007:INc,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "4",
pages = "59--62",
month = sep,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1327312.1327323",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:50:54 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp. arch newsgroup, a forum for discussion of
computer architecture on the Internet---an
international computer network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kise:2007:SIA,
author = "Kenji Kise and Toshinori Sato and Hironori Nakajo",
title = "Special issue: {ALPS'07 -- Advanced Low Power
Systems}: Introduction",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "1--2",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360469",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this issue, we present the papers from the
proceedings of the 2nd International Workshop on
Advanced Low Power Systems (ALPS 2007) that was held in
conjunction with the 21st International Conference on
Supercomputing in Seattle.\par
'Thoughtfulness' is an important keyword in the both
current and future technologies in all over the world:
Thoughtful to human being, thoughtful to our
surroundings, thoughtful to the earth, and so on. For
the thoughtfulness, Low-power is believed to be one of
the most indispensable keyword. The ALPS workshop
focuses on the current technological challenges in
developing low-power and power-aware computing systems
ranging from servers to embedded devices. The goal of
the workshop is to bring all aspects of power-aware
computing from industry and academia.\par
This year, we have one invited talk entitled 'An Under
2W 100GOPS Video Recognition Processor Based on a
Linear Array of 128 4-Way VLIW Processing Elements' by
Shorin Kyo (NEC Corporation) and 6 papers selected
based on the full paper review by the program committee
members.\par
The first set of papers discusses low-power designs. We
have three papers: 'Optimal Pipeline Depth with
Pipeline Stage Unification Adoption' by Jun Yao, Hajime
Shimada, Shinobu Miwa, and Shinji Tomita, 'VCLEARIT: A
VLSI CMOS Circuit Leakage Reduction Technique For
Nanoscale Technologies' by Preetham Lakshmikanthan and
Adrian Nunez, and 'Leakage Energy Reduction in Cache
Memory by Data Compression' by Kiyofumi Tanaka and
Takahiro Kawahara.\par
The second set of papers: 'Preventing Timing Errors on
Register Writes: Mechanisms of Detections and
Recoveries' by Hidetsugu Irie, Ken Sugimoto, Masahiro
Goshima, and Suichi Sakai, 'Not Multi-, but Many-Core:
Designing Integral Parallel Architectures for Embedded
Computation' by Mihaela Malita, Gheorghe Stefany, and
Dominique Thi{\'e}baut, and 'Fine-grain Compensation
Method with Consideration of Trade-offs between
Computation and Data Transfer for Power Consumption' by
Takefumi Miyoshi and Nobuhiko Sugino, covers
reliability, many-core and parallelization
issues.\par
All papers here are going to create the way to the new
aspects of low-power systems. We hope you will find the
papers of this special issue of Computer Architecture
News to be stimulating and that you will be inspired to
contribute your efforts to the future low power
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yao:2007:OPD,
author = "Jun Yao and Shinobu Miwa and Hajime Shimada and Shinji
Tomita",
title = "Optimal pipeline depth with pipeline stage unification
adoption",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "3--9",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360470",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To find the optimal pipeline design point by
considering both performance and power objectives has
been one focus of interest in recent researches.
However, we found that previous papers did not consider
deepening or shrinking pipeline depth dynamically
during the program execution. In this paper, with the
adoption of the earlier proposed Pipeline Stage
Unification (PSU) method, we studied the relationship
between power/performance and pipeline depth in
processors with a pipeline of multi-usable depths. Our
evaluation results of SPECint2000 benchmarks shown in
this paper illustrate that the PSU adoption can achieve
good efficiency for platforms which concern both energy
and performance, even after the utilization of complex
clock gating.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "pipeline design point; pipeline stage unification;
power/performance",
}
@Article{Lakshmikanthan:2007:VVC,
author = "Preetham Lakshmikanthan and Adrian Nu{\~n}ez",
title = "{VCLEARIT}: a {VLSI CMOS} circuit leakage reduction
technique for nanoscale technologies",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "10--16",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360471",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Leakage power loss is a major concern in
deep-submicron technologies as it drains the battery
even when a circuit is completely idle. In this paper,
we first present a novel leakage reduction technique
and then compare and contrast it with other well
established leakage reduction techniques. Our leakage
reduction technique achieves cancellation of leakage
effects in both the pull-up network (PUN) as well as
the pull-down network (PDN) for CMOS circuits. It
involves voltage balancing in the PUN and PDN paths
using a combination of high- V$_T$ (high voltage
threshold) and standard- V$_T$ sleep transistors.
Experiments conducted on a variety of multi-level
combinational MCNC'91 benchmarks show significant
savings in leakage power (upto 3 orders of magnitude),
with lesser area and delay penalty using our leakage
reduction technique when compared to other
techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tanaka:2007:LER,
author = "Kiyofumi Tanaka and Takahiro Kawahara",
title = "Leakage energy reduction in cache memory by data
compression",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "17--24",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360472",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cache memory is effective in bridging a growing speed
gap between a processor and relatively slow external
main memory. Almost all of today's commercial
processors, not only high-performance microprocessors
but embedded ones, have on-chip cache memories.
However, energy consumption in the cache memory would
approach or exceed 50\% of the total consumption by the
processors, which leads to a serious problem in terms
of allowable temperature and performance improvement.
An important point to note is that, in the near future,
static (leakage) energy will dominate the energy
consumption in deep sub-micron processes. In this
paper, we propose cache memory architecture that
exploits gated-Vdd control per cache block and a
dynamic data compression scheme in the secondary cache,
and achieves efficient reduction of static energy
consumed by the secondary cache memory. In the
simulation using SPEC95 integer benchmarks, our
technique reduced about 45\% of leakage energy in the
cache at maximum, and about 28\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache memory; data compression; gated-Vdd; leakage
energy",
}
@Article{Irie:2007:PTE,
author = "Hidetsugu Irie and Ken Sugimoto and Masahiro Goshima
and Shuich Sakai",
title = "Preventing timing errors on register writes:
mechanisms of detections and recoveries",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "25--31",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360473",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To deal with the increasing variations of the
intra-chip transistors, one promising approach is to
dynamically detect and recover the timing-errors with
microarchitecutre. This will induce dependability and
efficiency into microprocessors because it allows VLSI
to operate at the optimum frequency and voltage while
ensuring accuracy.\par
A few approaches for dynamically detecting
timing-errors have been proposed, but none of them have
focused on register writes. In this paper, we propose a
technique for detecting and recovering from timing
errors during register writes. We introduce a verifying
technique that uses additional buffer (called the write
assurance buffer (WAB)) which is provided with a
sufficient timing margin. The evaluation results reveal
a performance degradation of 4.5\% using an 8-entry
WAB; this value becomes negligible when a 16-entry WAB
is used.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Malita:2007:MMC,
author = "Mihaela Mali{\c{t}}a and Gheorghe {\c{S}}tefan and
Dominique Thi{\'e}baut",
title = "Not multi-, but many-core: designing integral parallel
architectures for embedded computation",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "32--38",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360474",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent embedded systems have switched to fully
programmable parallel architectures. To make sure all
corner cases usually present in real applications are
supported and efficiently implemented in this switch of
implementation, new solutions must be found. We
introduce the integral parallel architecture (IPA) as a
solution supporting intensive data computation in
System-on-a-chip (Soc) implementations, fitting in a
small area, and requiring low power. An IPA supports
naturally all three possible styles of parallelism:
data, time, and speculative.\par
As an illustrative example, we present the BA1024 chip,
a fully programmable SoC designed by BrightScale, Inc.
for HDTV codecs. Its main performance figures include
60 GOPS/Watt and 2 GOPS/mm$^2$, representing an
efficient IPA approach for embedded computation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "embedded systems; parallel architectures; programmable
systems; video processing",
}
@Article{Miyoshi:2007:FGC,
author = "Takefumi Miyoshi and Nobuhiko Sugino",
title = "Fine-grain compensation method with consideration of
trade-offs between computation and data transfer for
power consumption",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "39--44",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360475",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Fine-grain parallelizing method with consideration of
the number of data transfers for low power consumption
is proposed. In the proposed method, power consumption
by data transfers between processor elements in a
multiprocessor is focused on, and the number of data
transfers is reduced.\par
In this paper, a measure based on the relationship
between variables in a given program is defined to
evaluate the number of data transfers, firstly. And
then a proposed compensation method by use of the
evaluation of power consumption based on the measure is
explained. Finally, the result of applying proposed
compensation method implemented on COINS framework to
several example programs is shown.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Romanescu:2007:VSC,
author = "Bogdan F. Romanescu and Michael E. Bauer and Sule Ozev
and Daniel J. Sorin",
title = "{VariaSim}: simulating circuits and systems in the
presence of process variability",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "45--48",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360465",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we present VariaSim, the publicly
available Static Statistical Timing Analysis (SSTA)
Tool from Duke University. VariaSim enables researchers
to analyze the impact of CMOS process variability on
the behavior of circuits and systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Venkateswaran:2007:FGSa,
author = "N. Venkateswaran and Deepak Srinivasan and Madhavan
Manivannan and T. P. Ramnath Sai Sagar and Shyamsundar
Gopalakrishnan and VinothKrishnan Elangovan and Karthik
Chandrasekar and Prem Kumar Ramesh and Viswanath
Venkatesan and Arvindakshan Babu and Sudharshan",
title = "Future generation supercomputers {I}: a paradigm for
node architecture",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "49--60",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360466",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As a result of the increasing requirements of present
and future computation intensive applications, there
have been many fundamentally divergent approaches such
as the Blue-Gene, TRIPS, HERO, Cascade spurred in order
to provide increased performance at node level in
supercomputing clusters. The design of the node
architecture should be such that 'Cost-Effective
Supercomputing' is realized without compromising on the
requirements of the ever-performance hungry grand
challenge applications. However, to increase
performance at the cluster level, scalability and
likewise tackling the mapping complexity across the
large cluster of nodes becomes critical. The potential
of such a node architecture can be fully exploited only
with an appropriate cluster architecture. In an attempt
to address these issues for efficient and
Cost-Effective Supercomputing, we propose a novel
paradigm for designing High Performance Clusters, in
two papers. In paper-II, we discuss the design of
operating system and cluster architecture. In this
paper, we present a node architecture model based on
the Memory In Processor paradigm and discuss the
related architectural aspects (ISA, compiler, network
interconnection etc). We provide a design space based
on the proposed model for which a simulator is
developed, with the help of which the performance of
such a node architecture is outlined.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Venkateswaran:2007:FGSb,
author = "N. Venkateswaran and Deepak Srinivasan and Madhavan
Manivannan and T. P. Ramnath Sai Sagar and Shyamsundar
Gopalakrishnan and VinothKrishnan Elangovan and Arvind
M. and Prem Kumar Ramesh and Karthik Ganesan and
Viswanath Krishnamurthy and Sivaramakrishnan",
title = "Future generation supercomputers {II}: a paradigm for
cluster architecture",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "61--70",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360467",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In part-I, a novel multi-core node architecture was
proposed which when employed in a cluster environment
would be capable of tackling computational complexity
associated with wide class of applications.
Furthermore, it was discussed that by appropriately
scaling the architectural specifications, Teraops
computing power could be achieved at the node level. In
order to harness the computational power of such a
node, we have developed an efficient application
execution model with a competent cluster architectural
backbone. In this paper we present the novel cluster
paradigm, dealing with operating system design,
parallel programming model and cluster interconnection
network. Our approach in developing the competent
cluster design revolves around an execution model to
aid the execution of multiple applications
simultaneously on all partitions of the cluster,
leading to cost sharing across applications. This would
be a major initiative towards achieving Cost-Effective
Supercomputing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2007:INd,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "35",
number = "5",
pages = "71--73",
month = dec,
year = "2007",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1360464.1360477",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:13 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on the Internet---an international
computer network.\par
As always, the opinions expressed in this column are
the personal views of the authors, and do not
necessarily represent the institutions to which they
are affiliated.\par
Text which sets the context of a message appears
underlined or in italics; this is usually text the
author has quoted from earlier messages. The code-like
expressions below the authors' names are their
addresses on Internet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Winfree:2008:TMP,
author = "Erik Winfree",
title = "Toward molecular programming with {DNA}",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "1--1",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346282",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Biological organisms are beautiful examples of
programming. The program and data are stored in
biological molecules such as DNA, RNA, and proteins;
the algorithms are carried out by molecular and
biochemical processes; and the end result is the
creation and function of an organism. If we understood
how to program molecular systems, what could we create?
Lifelike technologies whose basic operations are
chemical reactions? The fields of chemistry, physics,
biology, and computer science are converging as we
begin to synthesize molecules, molecular machines, and
molecular systems of ever increasing complexity,
leading to subdisciplines such as DNA nanotechnology,
DNA computing, and synthetic biology. Having
demonstrated simple devices and systems --
self-assembled structures, molecular motors, chemical
logic gates -- researchers are now turning to the
question of how to create large-scale integrated
systems. To do so, we must learn how to manage
complexity: how to efficiently specify the structure
and behavior of intricate molecular systems, how to
compile such specifications down to the design of
molecules to be synthesized in the lab, and how to
ensure that such systems function robustly. These
issues will be illustrated for chemical logic circuits
based on cascades of DNA hybridization reactions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "DNA; molecular programming",
}
@Article{Chen:2008:OVB,
author = "Xiaoxin Chen and Tal Garfinkel and E. Christopher
Lewis and Pratap Subrahmanyam and Carl A. Waldspurger
and Dan Boneh and Jeffrey Dwoskin and Dan R. K. Ports",
title = "{Overshadow}: a virtualization-based approach to
retrofitting protection in commodity operating
systems",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "2--13",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346284",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Commodity operating systems entrusted with securing
sensitive data are remarkably large and complex, and
consequently, frequently prone to compromise. To
address this limitation, we introduce a
virtual-machine-based system called Overshadow that
protects the privacy and integrity of application data,
even in the event of a total OS compromise. Overshadow
presents an application with a normal view of its
resources, but the OS with an encrypted view. This
allows the operating system to carry out the complex
task of managing an application's resources, without
allowing it to read or modify them. Thus, Overshadow
offers a last line of defense for application
data.\par
Overshadow builds on multi-shadowing, a novel mechanism
that presents different views of 'physical' memory,
depending on the context performing the access. This
primitive offers an additional dimension of protection
beyond the hierarchical protection domains implemented
by traditional operating systems and processor
architectures.\par
We present the design and implementation of Overshadow
and show how its new protection semantics can be
integrated with existing systems. Our design has been
fully implemented and used to protect a wide range of
unmodified legacy applications running on an unmodified
Linux operating system. We evaluate the performance of
our implementation, demonstrating that this approach is
practical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cloaking; hypervisors; memory protection;
multi-shadowing; operating systems; virtual machine
monitors; VMM",
}
@Article{McCune:2008:HLC,
author = "Jonathan M. McCune and Bryan Parno and Adrian Perrig
and Michael K. Reiter and Arvind Seshadri",
title = "How low can you go?: recommendations for
hardware-supported minimal {TCB} code execution",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "14--25",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346285",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We explore the extent to which newly available
CPU-based security technology can reduce the Trusted
Computing Base (TCB) for security-sensitive
applications. We find that although this new technology
represents a step in the right direction, significant
performance issues remain. We offer several suggestions
that leverage existing processor technology, retain
security, and improve performance. Implementing these
recommendations will finally allow application
developers to focus exclusively on the security of
their own code, enabling it to execute in isolation
from the numerous vulnerabilities in the underlying
layers of legacy code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "late launch; secure execution; trusted computing",
}
@Article{Bhargava:2008:ATD,
author = "Ravi Bhargava and Benjamin Serebrin and Francesco
Spadini and Srilatha Manne",
title = "Accelerating two-dimensional page walks for
virtualized systems",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "26--35",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346286",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Nested paging is a hardware solution for alleviating
the software memory management overhead imposed by
system virtualization. Nested paging complements
existing page walk hardware to form a two-dimensional
(2D) page walk, which reduces the need for hypervisor
intervention in guest page table management. However,
the extra dimension also increases the maximum number
of architecturally-required page table
references.\par
This paper presents an in-depth examination of the 2D
page table walk overhead and options for decreasing it.
These options include using the AMD Opteron processor's
page walk cache to exploit the strong reuse of page
entry references. For a mix of server and SPEC
benchmarks, the presented results show a 15\%-38\%
improvement in guest performance by extending the
existing page walk cache to also store the nested
dimension of the 2D page walk. Caching nested page
table translations and skipping multiple page entry
references produce an additional 3\%-7\%
improvement.\par
Much of the remaining 2D page walk overhead is due to
low-locality nested page entry references, which result
in additional memory hierarchy misses. By using large
pages, the hypervisor can eliminate many of these
long-latency accesses and further improve the guest
performance by 3\%-22\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "AMD; hypervisor; memory management; nested paging;
page walk caching; TLB; virtual machine monitor;
virtualization",
}
@Article{Lee:2008:ETL,
author = "Benjamin C. Lee and David Brooks",
title = "Efficiency trends and limits from comprehensive
microarchitectural adaptivity",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "36--47",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346288",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Increasing demand for power-efficient,
high-performance computing requires tuning applications
and/or the underlying hardware to improve the mapping
between workload heterogeneity and computational
resources. To assess the potential benefits of hardware
tuning, we propose a framework that leverages
synergistic interactions between recent advances in (a)
sampling, (b) predictive modeling, and (c) optimization
heuristics. This framework enables qualitatively new
capabilities in analyzing the performance and power
characteristics of adaptive microarchitectures. For the
first time, we are able to simultaneously consider high
temporal and comprehensive spatial adaptivity. In
particular, we optimize efficiency for many, short
adaptive intervals and identify the best configuration
of 15 parameters, which define a space of 240B
point.\par
With frequent sub-application reconfiguration and a
fully reconfigurable hardware substrate, adaptive
microarchitectures achieve bips$^3$ /w efficiency gains
of up to 5.3x (median 2.4x) relative to their static
counterparts already optimized for a given application.
This 5.3x efficiency gain is derived from a 1.6x
performance gain and 0.8x power reduction. Although
several applications achieve a significant fraction of
their potential efficiency with as few as three
adaptive parameters, the three most significant
parameters differ across applications. These
differences motivate a hardware substrate capable of
comprehensive adaptivity to meet these diverse
application requirements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "adaptivity; efficiency; inference; microarchitecture;
performance; power; reconfigurablity; regression;
simulation; statistics",
}
@Article{Raghavendra:2008:NPS,
author = "Ramya Raghavendra and Parthasarathy Ranganathan and
Vanish Talwar and Zhikui Wang and Xiaoyun Zhu",
title = "No 'power' struggles: coordinated multi-level power
management for the data center",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "48--59",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346289",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Power delivery, electricity consumption, and heat
management are becoming key challenges in data center
environments. Several past solutions have individually
evaluated different techniques to address separate
aspects of this problem, in hardware and software, and
at local and global levels. Unfortunately, there has
been no corresponding work on coordinating all these
solutions. In the absence of such coordination, these
solutions are likely to interfere with one another, in
unpredictable (and potentially dangerous) ways. This
paper seeks to address this problem. We make two key
contributions. First, we propose and validate a power
management solution that coordinates different
individual approaches. Using simulations based on 180
server traces from nine different real-world
enterprises, we demonstrate the correctness, stability,
and efficiency advantages of our solution. Second,
using our unified architecture as the base, we perform
a detailed quantitative sensitivity analysis and draw
conclusions about the impact of different
architectures, implementations, workloads, and system
design choices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "capping; control theory; coordination; data center;
efficiency; power management; virtualization",
}
@Article{Ballapuram:2008:EAS,
author = "Chinnakrishnan S. Ballapuram and Ahmad Sharif and
Hsien-Hsin S. Lee",
title = "Exploiting access semantics and program behavior to
reduce snoop power in chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "60--69",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346290",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Integrating more processor cores on-die has become the
unanimous trend in the microprocessor industry. Most of
the current research thrusts using chip multiprocessors
(CMPs) as the baseline to analyze problems in various
domains. One of the main design issues facing CMP
systems is the growing number of snoops required to
maintain cache coherency and to support
self/cross-modifying code that leads to power and
performance limitations. In this paper, we analyze the
internal and external snoop behavior in a CMP system
and relax the snoopy cache coherence protocol based on
the program semantics and properties of the shared
variables for saving power. Based on the observations
and analyses, we propose two novel techniques:
Selective Snoop Probe (SSP) and Essential Snoop Probe
(ESP) to reduce power without compromising performance.
Our simulation results show that using the SSP
technique, 5\% to 65\% data cache energy savings per
core for different processor configurations can be
achieved with 1\% to 2\% performance improvement. We
also show that 5\% to 82\% of data cache energy per
core is spent on the non-essential snoop probes that
can be saved using the ESP technique.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; internal and external snoops;
MESI protocol; self-modifying code",
}
@Article{Mallik:2008:PMU,
author = "Arindam Mallik and Jack Cosgrove and Robert P. Dick
and Gokhan Memik and Peter Dinda",
title = "{PICSEL}: measuring user-perceived performance to
control dynamic frequency scaling",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "70--79",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346291",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The ultimate goal of a computer system is to satisfy
its users. The success of architectural or system-level
optimizations depends largely on having accurate
metrics for user satisfaction. We propose to derive
such metrics from information that is 'close to flesh'
and apparent to the user rather than from information
that is 'close to metal' and hidden from the user. We
describe and evaluate PICSEL, a dynamic voltage and
frequency scaling (DVFS) technique that uses
measurements of variations in the rate of change of a
computer's video output to estimate user-perceived
performance. Our adaptive algorithms, one conservative
and one aggressive, use these estimates to dramatically
reduce operating frequencies and voltages for
graphically-intensive applications while maintaining
performance at a satisfactory level for the user. We
evaluate PICSEL through user studies conducted on a
Pentium M laptop running Windows XP. Experiments
performed with 20 users executing three applications
indicate that the measured laptop power can be reduced
by up to 12.1\%, averaged across all of our users and
applications, compared to the default Windows XP DVFS
policy. User studies revealed that the difference in
overall user satisfaction between the more aggressive
version of PICSEL and Windows DVFS were statistically
insignificant, whereas the conservative version of
PICSEL actually improved user satisfaction when
compared to Windows DVFS.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dynamic voltage and frequency scaling; power
management; thermal emergency; user-perceived
performance",
}
@Article{Joao:2008:IPO,
author = "Jose A. Joao and Onur Mutlu and Hyesoon Kim and Rishi
Agarwal and Yale N. Patt",
title = "Improving the performance of object-oriented languages
with dynamic predication of indirect jumps",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "80--90",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353535.1346293",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Indirect jump instructions are used to implement
increasingly-common programming constructs such as
virtual function calls, switch-case statements, jump
tables, and interface calls. The performance impact of
indirect jumps is likely to increase because indirect
jumps with multiple targets are difficult to predict
even with specialized hardware.\par
This paper proposes a new way of handling
hard-to-predict indirect jumps: dynamically predicating
them. The compiler (static or dynamic) identifies
indirect jumps that are suitable for predication along
with their control-flow merge (CFM) points. The
hardware predicates the instructions between different
targets of the jump and its CFM point if the jump turns
out to be hard-to-predict at run time. If the jump
would actually have been mispredicted, its dynamic
predication eliminates a pipeline flush, thereby
improving performance.\par
Our evaluations show that Dynamic Indirect jump
Predication (DIP) improves the performance of a set of
object-oriented applications including the Java DaCapo
benchmark suite by 37.8\% compared to a commonly-used
branch target buffer based predictor, while also
reducing energy consumption by 24.8\%. We compare DIP
to three previously proposed indirect jump predictors
and find that it provides the best performance and
energy-efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dynamic predication; indirect jumps; object-oriented
languages; predicated execution; virtual functions",
}
@Article{Wegiel:2008:MCV,
author = "Michal Wegiel and Chandra Krintz",
title = "The mapping collector: virtual memory support for
generational, parallel, and concurrent compaction",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "91--102",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353535.1346294",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Parallel and concurrent garbage collectors are
increasingly employed by managed runtime environments
(MREs) to maintain scalability, as multi-core
architectures and multi-threaded applications become
pervasive. Moreover, state-of-the-art MREs commonly
implement compaction to eliminate heap fragmentation
and enable fast linear object allocation.\par
Our empirical analysis of object demographics reveals
that unreachable objects in the heap tend to form
clusters large enough to be effectively managed at the
granularity of virtual memory pages. Even though
processes can manipulate the mapping of the virtual
address space through the standard operating system
(OS) interface on most platforms, extant
parallel/concurrent compactors do not do so to exploit
this clustering behavior and instead achieve compaction
by performing, relatively expensive, object moving and
pointer adjustment.\par
We introduce the Mapping Collector (MC), which
leverages virtual memory operations to reclaim and
consolidate free space without moving objects and
updating pointers. MC is a nearly-single-phase
compactor that is simpler and more efficient than
previously reported compactors that comprise two to
four phases. Through effective MRE-OS coordination, MC
maintains the simplicity of a non-moving collector
while providing efficient parallel and concurrent
compaction.\par
We implement both stop-the-world and concurrent MC in a
generational garbage collection framework within the
open-source HotSpot Java Virtual Machine. Our
experimental evaluation using a multiprocessor
indicates that MC significantly increases throughput
and scalability as well as reduces pause times,
relative to state-of-the-art, parallel and concurrent
compactors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "compaction; concurrent; parallel; virtual memory",
}
@Article{Devietti:2008:HAS,
author = "Joe Devietti and Colin Blundell and Milo M. K. Martin
and Steve Zdancewic",
title = "{Hardbound}: architectural support for spatial safety
of the {C} programming language",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "103--114",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346295",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The C programming language is at least as well known
for its absence of spatial memory safety guarantees
(i.e., lack of bounds checking) as it is for its high
performance. C's unchecked pointer arithmetic and array
indexing allow simple programming mistakes to lead to
erroneous executions, silent data corruption, and
security vulnerabilities. Many prior proposals have
tackled enforcing spatial safety in C programs by
checking pointer and array accesses. However, existing
software-only proposals have significant drawbacks that
may prevent wide adoption, including: unacceptably high
run-time overheads, lack of completeness, incompatible
pointer representations, or need for non-trivial
changes to existing C source code and compiler
infrastructure.\par
Inspired by the promise of these software-only
approaches, this paper proposes a hardware bounded
pointer architectural primitive that supports
cooperative hardware/software enforcement of spatial
memory safety for C programs. This bounded pointer is a
new hardware primitive datatype for pointers that
leaves the standard C pointer representation intact,
but augments it with bounds information maintained
separately and invisibly by the hardware. The bounds
are initialized by the software, and they are then
propagated and enforced transparently by the hardware,
which automatically checks a pointer's bounds before it
is dereferenced. One mode of use requires instrumenting
only malloc, which enables enforcement of
per-allocation spatial safety for heap-allocated
objects for existing binaries. When combined with
simple intraprocedural compiler instrumentation,
hardware bounded pointers enable a low-overhead
approach for enforcing complete spatial memory safety
in unmodified C programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "C programming language; spatial memory safety",
}
@Article{Lvin:2008:ATA,
author = "Vitaliy B. Lvin and Gene Novark and Emery D. Berger
and Benjamin G. Zorn",
title = "{Archipelago}: trading address space for reliability
and security",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "115--124",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353535.1346296",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory errors are a notorious source of security
vulnerabilities that can lead to service interruptions,
information leakage and unauthorized access. Because
such errors are also difficult to debug, the absence of
timely patches can leave users vulnerable to attack for
long periods of time. A variety of approaches have been
introduced to combat these errors, but these often
incur large runtime overheads and generally abort on
errors, threatening availability.\par
This paper presents Archipelago, a runtime system that
takes advantage of available address space to
substantially reduce the likelihood that a memory error
will affect program execution. Archipelago randomly
allocates heap objects far apart in virtual address
space, effectively isolating each object from buffer
overflows. Archipelago also protects against dangling
pointer errors by preserving the contents of freed
objects after they are freed. Archipelago thus trades
virtual address space---a plentiful resource on 64-bit
systems---for significantly improved program
reliability and security, while limiting physical
memory consumption by tracking the working set of an
application and compacting cold objects. We show that
Archipelago allows applications to continue to run
correctly in the face of thousands of memory errors.
Across a suite of server applications, Archipelago's
performance overhead is 6\% on average (between -7\%
and 22\%), making it especially suitable to protect
servers that have known security vulnerabilities due to
heap memory errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "Archipelago; buffer overflow; dynamic memory
allocation; memory errors; probabilistic memory safety;
randomized algorithms; virtual memory",
}
@Article{Choi:2008:ABP,
author = "Bumyong Choi and Leo Porter and Dean M. Tullsen",
title = "Accurate branch prediction for short threads",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "125--134",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346298",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Multi-core processors, with low communication costs
and high availability of execution cores, will increase
the use of execution and compilation models that use
short threads to expose parallelism. Current branch
predictors seek to incorporate large amounts of control
flow history to maximize accuracy. However, when that
history is absent the predictor fails to work as
intended. Thus, modern predictors are almost useless
for threads below a certain length.\par
Using a Speculative Multithreaded (SpMT) architecture
as an example of a system which generates shorter
threads, this work examines techniques to improve
branch prediction accuracy when a new thread begins to
execute on a different core. This paper proposes a
minor change to the branch predictor that gives
virtually the same performance on short threads as an
idealized predictor that incorporates unknowable
pre-history of a spawned speculative thread. At the
same time, strong performance on long threads is
preserved. The proposed technique sets the global
history register of the spawned thread to the initial
value of the program counter. This novel and simple
design reduces branch mispredicts by 29\% and provides
as much as a 13\% IPC improvement on selected SPEC2000
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "branch prediction; chip multiprocessors",
}
@Article{Srikantaiah:2008:ASP,
author = "Shekhar Srikantaiah and Mahmut Kandemir and Mary Jane
Irwin",
title = "Adaptive set pinning: managing shared caches in chip
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "135--144",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346299",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As part of the trend towards Chip Multiprocessors
(CMPs) for the next leap in computing performance, many
architectures have explored sharing the last level of
cache among different processors for better
performance-cost ratio and improved resource
allocation. Shared cache management is a crucial CMP
design aspect for the performance of the system. This
paper first presents a new classification of cache
misses - CII: Compulsory, Inter-processor and
Intra-processor misses - for CMPs with shared caches to
provide a better understanding of the interactions
between memory transactions of different processors at
the level of shared cache in a CMP. We then propose a
novel approach, called set pinning, for eliminating
inter-processor misses and reducing intra-processor
misses in a shared cache. Furthermore, we show that an
adaptive set pinning scheme improves over the benefits
obtained by the set pinning scheme by significantly
reducing the number of off-chip accesses. Extensive
analysis of these approaches with SPEComp 2001
benchmarks is performed using a full system simulator.
Our experiments indicate that the set pinning scheme
achieves an average improvement of 22.18\% in the L2
miss rate while the adaptive set pinning scheme reduces
the miss rates by an average of 47.94\% as compared to
the traditional shared cache scheme. They also improve
the performance by 7.24\% and 17.88\% respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "CMP; inter-processor; intra-processor; set pinning;
shared cache",
}
@Article{Tuck:2008:SSE,
author = "James Tuck and Wonsun Ahn and Luis Ceze and Josep
Torrellas",
title = "{SoftSig}: software-exposed hardware signatures for
code analysis and optimization",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "145--156",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346300",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many code analysis techniques for optimization,
debugging, or parallelization need to perform runtime
disambiguation of sets of addresses. Such operations
can be supported efficiently and with low complexity
with hardware signatures.\par
To enable flexible use of signatures, this paper
proposes to expose a Signature Register File to the
software through a rich ISA. The software has great
flexibility to decide, for each signature,which
addresses to collect and which addresses to
disambiguate against. We call this architecture
SoftSig. In addition, as an example of SoftSig use, we
show how to detect redundant function calls efficiently
and eliminate them dynamically. We call this algorithm
MemoiSE. On average for five popular applications,
MemoiSE reduces the number of dynamic instructions by
9.3\%, thereby reducing the execution time of the
applications by 9\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "memory disambiguation; multi-core architectures;
runtime optimization",
}
@Article{Burcea:2008:PV,
author = "Ioana Burcea and Stephen Somogyi and Andreas Moshovos
and Babak Falsafi",
title = "Predictor virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "157--167",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346301",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many hardware optimizations rely on collecting
information about program behavior at runtime. This
information is stored in lookup tables. To be accurate
and effective, these optimizations usually require
large dedicated on-chip tables. Although technology
advances offer an increased amount of on-chip
resources, these resources are allocated to increase
the size of on-chip conventional cache
hierarchies.\par
This work proposes Predictor Virtualization, a
technique that uses the existing memory hierarchy to
emulate large predictor tables. We demonstrate the
benefits of this technique by virtualizing a
state-of-the-art data prefetcher. Full-system,
cycle-accurate simulations demonstrate that the
virtualized prefetcher preserves the performance
benefits of the original design, while reducing the
on-chip storage dedicated to the predictor table from
60KB down to less than one kilobyte.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "caches; memory hierarchy; metadata; predictor
virtualization",
}
@Article{Ganapathy:2008:DIM,
author = "Vinod Ganapathy and Matthew J. Renzelmann and Arini
Balakrishnan and Michael M. Swift and Somesh Jha",
title = "The design and implementation of microdrivers",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "168--178",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346303",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Device drivers commonly execute in the kernel to
achieve high performance and easy access to kernel
services. However, this comes at the price of decreased
reliability and increased programming difficulty.
Driver programmers are unable to use user-mode
development tools and must instead use cumbersome
kernel tools. Faults in kernel drivers can cause the
entire operating system to crash. User-mode drivers
have long been seen as a solution to this problem, but
suffer from either poor performance or new interfaces
that require a rewrite of existing drivers.\par
This paper introduces the Microdrivers architecture
that achieves high performance and compatibility by
leaving critical path code in the kernel and moving the
rest of the driver code to a user-mode process. This
allows data-handling operations critical to I/O
performance to run at full speed, while management
operations such as initialization and configuration run
at reduced speed in user-level. To achieve
compatibility, we present DriverSlicer, a tool that
splits existing kernel drivers into a kernel-level
component and a user-level component using a small
number of programmer annotations. Experiments show that
as much as 65\% of driver code can be removed from the
kernel without affecting common-case performance, and
that only 1-6 percent of the code requires
annotations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "device drivers; program partitioning; reliability",
}
@Article{Weinsberg:2008:TFC,
author = "Yaron Weinsberg and Danny Dolev and Tal Anker and Muli
Ben-Yehuda and Pete Wyckoff",
title = "Tapping into the fountain of {CPUs}: on operating
system support for programmable devices",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "179--188",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346304",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The constant race for faster and more powerful CPUs is
drawing to a close. No longer is it feasible to
significantly increase the speed of the CPU without
paying a crushing penalty in power consumption and
production costs. Instead of increasing single thread
performance, the industry is turning to multiple CPU
threads or cores (such as SMT and CMP) and
heterogeneous CPU architectures (such as the Cell
Broadband Engine). While this is a step in the right
direction, in every modern PC there is a wealth of
untapped compute resources. The NIC has a CPU; the disk
controller is programmable; some high-end graphics
adapters are already more powerful than host CPUs. Some
of these CPUs can perform some functions more
efficiently than the host CPUs. Our operating systems
and programming abstractions should be expanded to let
applications tap into these computational resources and
make the best use of them.\par
Therefore, we propose the H\par
YDRA framework, which lets application developers use
the combined power of every compute resource in a
coherent way. HYDRA is a programming model and a
runtime support layer which enables utilization of host
processors as well as various programmable peripheral
devices' processors. We present the framework and its
application for a demonstrative use-case, as well as
provide a thorough evaluation of its capabilities.
Using HYDRA we were able to cut down the development
cost of a system that uses multiple heterogeneous
compute resources significantly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "offloading; operating systems; programming model",
}
@Article{Shen:2008:HCD,
author = "Kai Shen and Ming Zhong and Sandhya Dwarkadas and
Chuanpeng Li and Christopher Stewart and Xiao Zhang",
title = "Hardware counter driven on-the-fly request
signatures",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "189--200",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346306",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Today's processors provide a rich source of
statistical information on application execution
through hardware counters. In this paper, we explore
the utilization of these statistics as request
signatures in server applications for identifying
requests and inferring high-level request properties (
e.g., CPU and I/O resource needs). Our key finding is
that effective request signatures may be constructed
using a small amount of hardware statistics while the
request is still in an early stage of its execution.
Such on-the-fly request identification and property
inference allow guided operating system adaptation at
request granularity ( e.g., resource-aware request
scheduling and on-the-fly request classification). We
address the challenges of selecting hardware counter
metrics for signature construction and providing
necessary operating system support for per-request
statistics management. Our implementation in the Linux
2.6.10 kernel suggests that our approach requires low
overhead suitable for runtime deployment. Our
on-the-fly request resource consumption inference
(averaging 7\%, 3\%, 20\%, and 41\% prediction errors
for four server workloads, TPC-C, TPC-H, J2EE-based
RUBiS, and a trace-driven index search, respectively)
is much more accurate than the online running-average
based prediction (73-82\% errors). Its use for
resource-aware request scheduling results in a 15-70\%
response time reduction for three CPU-bound
applications. Its use for on-the-fly request
classification and anomaly detection exhibits high
accuracy for the TPC-H workload with synthetically
generated anomalous requests following a typical
SQL-injection attack pattern.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "anomaly detection; hardware counter; operating system
adaptation; request classification; server system",
}
@Article{VanErtvelde:2008:DPA,
author = "Luk {Van Ertvelde} and Lieven Eeckhout",
title = "Dispersing proprietary applications as benchmarks
through code mutation",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "201--210",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346307",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Industry vendors hesitate to disseminate proprietary
applications to academia and third party vendors. By
consequence, the benchmarking process is typically
driven by standardized, open-source benchmarks which
may be very different from and likely not
representative of the real-life applications of
interest.\par
This paper proposes code mutation, a novel technique
that mutates a proprietary application to complicate
reverse engineering so that it can be distributed as a
benchmark. The benchmark mutant then serves as a proxy
for the proprietary application. The key idea in the
proposed code mutation approach is to preserve the
proprietary application's dynamic memory access and/or
control flow behavior in the benchmark mutant while
mutating the rest of the application code. To this end,
we compute program slices for memory access operations
and/or control flow operations trimmed through constant
value and branch profiles; and subsequently mutate the
instructions not appearing in these slices through
binary rewriting.\par
Our experimental results using SPEC CPU2000 and MiBench
benchmarks show that code mutation is a promising
technique that mutates up to 90\% of the static binary,
up to 50\% of the dynamically executed instructions,
and up to 35\% of the at run time exposed
inter-operation data dependencies. The performance
characteristics of the mutant are very similar to those
of the proprietary application across a wide range of
microarchitectures and hardware implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "benchmark generation; code mutation",
}
@Article{Mysore:2008:UVF,
author = "Shashidhar Mysore and Bita Mazloom and Banit Agrawal
and Timothy Sherwood",
title = "Understanding and visualizing full systems with data
flow tomography",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "211--221",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346308",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "It is not uncommon for modern systems to be composed
of a variety of interacting services, running across
multiple machines in such a way that most developers do
not really understand the whole system. As abstraction
is layered atop abstraction, developers gain the
ability to compose systems of extraordinary complexity
with relative ease. However, many software properties,
especially those that cut across abstraction layers,
become very difficult to understand in such
compositions. The communication patterns involved, the
privacy of critical data, and the provenance of
information, can be difficult to find and understand,
even with access to all of the source code. The goal of
Data Flow Tomography is to use the inherent information
flow of such systems to help visualize the interactions
between complex and interwoven components across
multiple layers of abstraction. In the same way that
the injection of short-lived radioactive isotopes help
doctors trace problems in the cardiovascular system,
the use of 'data tagging' can help developers slice
through the extraneous layers of software and pin-point
those portions of the system interacting with the data
of interest. To demonstrate the feasibility of this
approach we have developed a prototype system in which
tags are tracked both through the machine and in
between machines over the network, and from which novel
visualizations of the whole system can be derived. We
describe the system-level challenges in creating a
working system tomography tool and we qualitatively
evaluate our system by examining several example real
world scenarios.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "data flow tracking; tomography; virtual machine",
}
@Article{Ottoni:2008:COG,
author = "Guilherme Ottoni and David I. August",
title = "Communication optimizations for global multi-threaded
instruction scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "222--232",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353535.1346310",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The recent shift in the industry towards chip
multiprocessor (CMP) designs has brought the need for
multi-threaded applications to mainstream computing. As
observed in several limit studies, most of the
parallelization opportunities require looking for
parallelism beyond local regions of code. To exploit
these opportunities, especially for sequential
applications, researchers have recently proposed global
multi-threaded instruction scheduling techniques,
including DSWP and GREMIO. These techniques
simultaneously schedule instructions from large regions
of code, such as arbitrary loop nests or whole
procedures, and have been shown to be effective at
extracting threads for many applications. A key enabler
of these global instruction scheduling techniques is
the Multi-Threaded Code Generation (MTCG) algorithm
proposed in [16], which generates multi-threaded code
for any partition of the instructions into threads.
This algorithm inserts communication and
synchronization instructions in order to satisfy all
inter-thread dependences.\par
In this paper, we present a general compiler framework,
COCO, to optimize the communication and synchronization
instructions inserted by the MTCG algorithm. This
framework, based on thread-aware data-flow analyses and
graph min-cut algorithms, appropriately models and
optimizes all kinds of inter-thread dependences,
including register, memory, and control dependences.
Our experiments, using a fully automatic compiler
implementation of these techniques, demonstrate
significant reductions (about 30\% on average) in the
number of dynamic communication instructions in code
parallelized with DSWP and GREMIO. This reduction in
communication translates to performance gains of up to
40\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "communication; data-flow analysis; graph min-cut;
instruction scheduling; multi-threading;
synchronization",
}
@Article{Kulkarni:2008:OPB,
author = "Milind Kulkarni and Keshav Pingali and Ganesh
Ramanarayanan and Bruce Walter and Kavita Bala and L.
Paul Chew",
title = "Optimistic parallelism benefits from data
partitioning",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "233--243",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353534.1346311",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent studies of irregular applications such as
finite-element mesh generators and data-clustering
codes have shown that these applications have a
generalized data parallelism arising from the use of
iterative algorithms that perform computations on
elements of worklists. In some irregular applications,
the computations on different elements are independent.
In other applications, there may be complex patterns of
dependences between these computations.\par
The Galois system was designed to exploit this kind of
irregular data parallelism on multicore processors. Its
main features are (i) two kinds of set iterators for
expressing worklist-based data parallelism, and (ii) a
runtime system that performs optimistic parallelization
of these iterators, detecting conflicts and rolling
back computations as needed. Detection of conflicts and
rolling back iterations requires information from class
implementors.\par
In this paper, we introduce mechanisms to improve the
execution efficiency of Galois programs: data
partitioning, data-centric work assignment, lock
coarsening, and over-decomposition. These mechanisms
can be used to exploit locality of reference, reduce
mis-speculation, and lower synchronization overhead. We
also argue that the design of the Galois system permits
these mechanisms to be used with relatively little
modification to the user code. Finally, we present
experimental results that demonstrate the utility of
these mechanisms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "data partitioning; irregular programs; locality; lock
coarsening; optimistic parallelism;
over-decomposition",
}
@Article{Cox:2008:XEO,
author = "Russ Cox and Tom Bergan and Austin T. Clements and
Frans Kaashoek and Eddie Kohler",
title = "{Xoc}, an extension-oriented compiler for systems
programming",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "244--254",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353535.1346312",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Today's system programmers go to great lengths to
extend the languages in which they program. For
instance, system-specific compilers find errors in
Linux and other systems, and add support for
specialized control flow to Qt and event-based
programs. These compilers are difficult to build and
cannot always understand each other's language changes.
However, they can greatly improve code
understandability and correctness, advantages that
should be accessible to all programmers.\par
We describe an extension-oriented compiler for C called
xoc. An extension-oriented compiler, unlike a
conventional extensible compiler, implements new
features via many small extensions that are loaded
together as needed. Xoc gives extension writers full
control over program syntax and semantics while hiding
many compiler internals. Xoc programmers concisely
define powerful compiler extensions that, by
construction, can be combined; even some parts of the
base compiler, such as GNU C compatibility, are
structured as extensions.\par
Xoc is based on two key interfaces. Syntax patterns
allow extension writers to manipulate language
fragments using concrete syntax. Lazy computation of
attributes allows extension writers to use the results
of analyses by other extensions or the core without
needing to worry about pass scheduling.\par
Extensions built using xoc include xsparse, a 345-line
extension that mimics Sparse, Linux's C front end, and
xlambda, a 170-line extension that adds function
expressions to C. An evaluation of xoc using these and
13 other extensions shows that xoc extensions are
typically more concise than equivalent extensions
written for conventional extensible compilers and that
it is possible to compose extensions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "extension-oriented compilers",
}
@Article{Wells:2008:AIF,
author = "Philip M. Wells and Koushik Chakraborty and Gurindar
S. Sohi",
title = "Adapting to intermittent faults in multicore systems",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "255--264",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353536.1346314",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Future multicore processors will be more susceptible
to a variety of hardware failures. In particular,
intermittent faults, caused in part by manufacturing,
thermal, and voltage variations, can cause bursts of
frequent faults that last from several cycles to
several seconds or more. Due to practical limitations
of circuit techniques, cost-effective reliability will
likely require the ability to temporarily suspend
execution on a core during periods of intermittent
faults.\par
We investigate three of the most obvious techniques for
adapting to the dynamically changing resource
availability caused by intermittent faults, and
demonstrate their different system-level implications.
We show that system software reconfiguration has very
high overhead, that temporarily pausing execution on a
faulty core can lead to cascading livelock, and that
using spare cores has high fault-free cost. To remedy
these and other drawbacks of the three baseline
techniques, we propose using a thin hardware/firmware
layer to manage an overcommitted system -- one where
the OS is configured to use more virtual processors
than the number of currently available physical cores.
We show that this proposed technique can gracefully
degrade performance during intermittent faults of
various duration with low overhead, without involving
system software, and without requiring spare cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "intermittent faults; overcommitted system",
}
@Article{Li:2008:UPH,
author = "Man-Lap Li and Pradeep Ramachandran and Swarup Kumar
Sahoo and Sarita V. Adve and Vikram S. Adve and
Yuanyuan Zhou",
title = "Understanding the propagation of hard errors to
software and implications for resilient system design",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "265--276",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346315",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With continued CMOS scaling, future shipped hardware
will be increasingly vulnerable to in-the-field faults.
To be broadly deployable, the hardware reliability
solution must incur low overheads, precluding use of
expensive redundancy. We explore a cooperative
hardware-software solution that watches for anomalous
software behavior to indicate the presence of hardware
faults. Fundamental to such a solution is a
characterization of how hardware faults indifferent
microarchitectural structures of a modern processor
propagate through the application and OS.\par
This paper aims to provide such a characterization,
resulting in identifying low-cost detection methods and
providing guidelines for implementation of the recovery
and diagnosis components of such a reliability
solution. We focus on hard faults because they are
increasingly important and have different system
implications than the much studied transients. We
achieve our goals through fault injection experiments
with a microarchitecture-level full system timing
simulator. Our main results are: (1) we are able to
detect 95\% of the unmasked faults in 7 out of 8
studied microarchitectural structures with simple
detectors that incur zero to little hardware overhead;
(2) over 86\% of these detections are within latencies
that existing hardware checkpointing schemes can
handle, while others require software checkpointing;
and (3) a surprisingly large fraction of the detected
faults corrupt OS state, but almost all of these are
detected with latencies short enough to use hardware
checkpointing, thereby enabling OS recovery in
virtually all such cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "architecture; error detection; fault injection;
permanent fault",
}
@Article{Suleman:2008:FDT,
author = "M. Aater Suleman and Moinuddin K. Qureshi and Yale N.
Patt",
title = "Feedback-driven threading: power-efficient and
high-performance execution of multi-threaded workloads
on {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "277--286",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346317",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Extracting high-performance from the emerging Chip
Multiprocessors (CMPs) requires that the application be
divided into multiple threads. Each thread executes on
a separate core thereby increasing concurrency and
improving performance. As the number of cores on a CMP
continues to increase, the performance of some
multi-threaded applications will benefit from the
increased number of threads, whereas, the performance
of other multi-threaded applications will become
limited by data-synchronization and off-chip bandwidth.
For applications that get limited by
data-synchronization, increasing the number of threads
significantly degrades performance and increases
on-chip power. Similarly, for applications that get
limited by off-chip bandwidth, increasing the number of
threads increases on-chip power without providing any
performance improvement. Furthermore, whether an
application gets limited by data-synchronization, or
bandwidth, or neither depends not only on the
application but also on the input set and the machine
configuration. Therefore, controlling the number of
threads based on the run-time behavior of the
application can significantly improve performance and
reduce power.\par
This paper proposes Feedback-Driven Threading (FDT), a
framework to dynamically control the number of threads
using run-time information. FDT can be used to
implement Synchronization-Aware Threading (SAT), which
predicts the optimal number of threads depending on the
amount of data-synchronization. Our evaluation shows
that SAT can reduce both execution time and power by up
to 66\% and 78\% respectively. Similarly, FDT can be
used to implement Bandwidth-Aware Threading (BAT),
which predicts the minimum number of threads required
to saturate the off-chip bus. Our evaluation shows that
BAT reduces on-chip power by up to 78\%. When SAT and
BAT are combined, the average execution time reduces by
17\% and power reduces by 59\%. The proposed techniques
leverage existing performance counters and require
minimal support from the threading library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bandwidth; CMP; multi-threaded; synchronization",
}
@Article{Linderman:2008:MPM,
author = "Michael D. Linderman and Jamison D. Collins and Hong
Wang and Teresa H. Meng",
title = "{Merge}: a programming model for heterogeneous
multi-core systems",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "287--296",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346318",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper we propose the Merge framework, a
general purpose programming model for heterogeneous
multi-core systems. The Merge framework replaces
current ad hoc approaches to parallel programming on
heterogeneous platforms with a rigorous, library-based
methodology that can automatically distribute
computation across heterogeneous cores to achieve
increased energy and performance efficiency. The Merge
framework provides (1) a predicate dispatch-based
library system for managing and invoking function
variants for multiple architectures; (2) a high-level,
library-oriented parallel language based on map-reduce;
and (3) a compiler and runtime which implement the
map-reduce language pattern by dynamically selecting
the best available function implementations for a given
input and machine configuration. Using a generic
sequencer architecture interface for heterogeneous
accelerators, the Merge framework can integrate
function variants for specialized accelerators,
offering the potential for to-the-metal performance for
a wide range of heterogeneous architectures, all
transparent to the user. The Merge framework has been
prototyped on a heterogeneous platform consisting of an
Intel Core 2 Duo CPU and an 8-core 32-thread Intel
Graphics and Media Accelerator X3000, and a homogeneous
32-way Unisys SMP system with Intel Xeon processors. We
implemented a set of benchmarks using the Merge
framework and enhanced the library with X3000 specific
implementations, achieving speedups of 3.6x -- 8.5x
using the X3000 and 5.2x -- 22x using the 32-way system
relative to the straight C reference implementation on
a single IA32 core.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "GPGPU; heterogeneous multi-core; predicate dispatch",
}
@Article{Gummaraju:2008:SPG,
author = "Jayanth Gummaraju and Joel Coburn and Yoshio Turner
and Mendel Rosenblum",
title = "{Streamware}: programming general-purpose multicore
processors using streams",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "297--307",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346319",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recently, the number of cores on general-purpose
processors has been increasing rapidly. Using
conventional programming models, it is challenging to
effectively exploit these cores for maximal
performance. An interesting alternative candidate for
programming multiple cores is the stream programming
model, which provides a framework for writing programs
in a sequential-style while greatly simplifying the
task of automatic parallelization. It has been shown
that not only traditional media/image applications but
also more general-purpose data-intensive applications
can be expressed in the stream programming
style.\par
In this paper, we investigate the potential to use the
stream programming model to efficiently utilize
commodity multicore general-purpose processors (e.g.,
Intel/AMD). Although several stream languages and
stream compilers have recently been developed, they
typically target special-purpose stream processors. In
contrast, we propose a flexible software system,
Streamware, which automatically maps stream programs
onto a wide variety of general-purpose multicore
processor configurations. We leverage existing
compilation framework for stream processors and design
a runtime environment which takes as input the output
of these stream compilers in the form of
machine-independent stream virtual machine code. The
runtime environment assigns work to processor cores
considering processor/cache configurations and adapts
to workload variations. We evaluate this approach for a
few general-purpose scientific applications on real
hardware and a cycle-level simulator set-up to showcase
scaling and contention issues. The results show that
the stream programming model is a good choice for
efficiently exploiting modern and future multicore CPUs
for an important class of applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "general-purpose multicore processors; programming;
runtime system; streams",
}
@Article{Nightingale:2008:PSC,
author = "Edmund B. Nightingale and Daniel Peek and Peter M.
Chen and Jason Flinn",
title = "Parallelizing security checks on commodity hardware",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "308--318",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346321",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Speck (Speculative Parallel Check) is a system that
accelerates powerful security checks on commodity
hardware by executing them in parallel on multiple
cores. Speck provides an infrastructure that allows
sequential invocations of a particular security check
to run in parallel without sacrificing the safety of
the system. Speck creates parallelism in two ways.
First, Speck decouples a security check from an
application by continuing the application, using
speculative execution, while the security check
executes in parallel on another core. Second, Speck
creates parallelism between sequential invocations of a
security check by running later checks in parallel with
earlier ones. Speck provides a process-level replay
system to deterministically and efficiently synchronize
state between a security check and the original
process. We use Speck to parallelize three security
checks: sensitive data analysis, on-access virus
scanning, and taint propagation. Running on a 4-core
and an 8-core computer, Speck improves performance 4x
and 7.5x for the sensitive data analysis check, 3.3x
and 2.8x for the on-access virus scanning check, and
1.6x and 2x for the taint propagation check.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "operating systems; parallel; performance; security;
speculative execution",
}
@Article{Castro:2008:BBR,
author = "Miguel Castro and Manuel Costa and Jean-Philippe
Martin",
title = "Better bug reporting with better privacy",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "319--328",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1346281.1346322",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software vendors collect bug reports from customers to
improve the quality of their software. These reports
should include the inputs that make the software fail,
to enable vendors to reproduce the bug. However,
vendors rarely include these inputs in reports because
they may contain private user data. We describe a
solution to this problem that provides software vendors
with new input values that satisfy the conditions
required to make the software follow the same execution
path until it fails, but are otherwise unrelated with
the original inputs. These new inputs allow vendors to
reproduce the bug while revealing less private
information than existing approaches. Additionally, we
provide a mechanism to measure the amount of
information revealed in an error report. This mechanism
allows users to perform informed decisions on whether
or not to submit reports. We implemented a prototype of
our solution and evaluated it with real errors in real
programs. The results show that we can produce error
reports that allow software vendors to reproduce bugs
while revealing almost no private information.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bug reports; constraint solving; privacy; symbolic
execution",
}
@Article{Lu:2008:LMC,
author = "Shan Lu and Soyeon Park and Eunsoo Seo and Yuanyuan
Zhou",
title = "Learning from mistakes: a comprehensive study on real
world concurrency bug characteristics",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "329--339",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353536.1346323",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The reality of multi-core hardware has made concurrent
programs pervasive. Unfortunately, writing correct
concurrent programs is difficult. Addressing this
challenge requires advances in multiple directions,
including concurrency bug detection, concurrent program
testing, concurrent programming model design, etc.
Designing effective techniques in all these directions
will significantly benefit from a deep understanding of
real world concurrency bug characteristics.\par
This paper provides the first (to the best of our
knowledge) comprehensive real world concurrency bug
characteristic study. Specifically, we have carefully
examined concurrency bug patterns, manifestation, and
fix strategies of 105 randomly selected real world
concurrency bugs from 4 representative server and
client open-source applications (MySQL, Apache, Mozilla
and OpenOffice). Our study reveals several interesting
findings and provides useful guidance for concurrency
bug detection, testing, and concurrent programming
language design.\par
Some of our findings are as follows: (1) Around one
third of the examined non-deadlock concurrency bugs are
caused by violation to programmers' order intentions,
which may not be easily expressed via synchronization
primitives like locks and transactional memories; (2)
Around 34\% of the examined non-deadlock concurrency
bugs involve multiple variables, which are not well
addressed by existing bug detection tools; (3) About
92\% of the examined concurrency bugs can be reliably
triggered by enforcing certain orders among no more
than 4 memory accesses. This indicates that testing
concurrent programs can target at exploring possible
orders among every small groups of memory accesses,
instead of among all memory accesses; (4) About 73\% of
the examined non-deadlock concurrency bugs were not
fixed by simply adding or changing locks, and many of
the fixes were not correct at the first try, indicating
the difficulty of reasoning concurrent execution by
programmers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bug characteristics; concurrency bug; concurrent
program",
}
@Article{Anonymous:2008:MGC,
author = "Anonymous",
title = "Message from the {General Chairs}",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "x--x",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382166",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2008:MPC,
author = "Anonymous",
title = "Message from the {Program Chair}",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "xi--xi",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382167",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2008:R,
author = "Anonymous",
title = "Reviewers",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "xv--xviii",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382168",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tseng:2008:AOP,
author = "Francis Tseng and Yale N. Patt",
title = "Achieving Out-of-Order Performance with Almost
In-Order Complexity",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "3--12",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382169",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "There is still much performance to be gained by
out-of-order processors with wider issue widths.
However, traditional methods of increasing issue width
do not scale; that is, they drastically increase design
complexity and power requirements. This paper
introduces the braid, a compile-time identified entity
that enables the execution core to scale to wider
widths by exploiting the small fanout and short
lifetime of values produced by the program. Braid
processing requires identification by the compiler,
minor extensions to the ISA, and support by the
microarchitecture. The result from processing braids is
performance within 9\% of a very aggressive
conventional out-of-order microarchitecture with almost
the complexity of an in-order implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:2008:FCR,
author = "Mayank Agarwal and Nitin Navale and Kshitiz Malik and
Matthew I. Frank",
title = "Fetch-Criticality Reduction through Control
Independence",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "13--24",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.39",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Architectures that exploit control independence (CI)
promise to remove in-order fetch bottlenecks, like
branch mispredicts, instruction-cache misses and fetch
unit stalls, from the critical path of single-threaded
execution. By exposing more fetch options, however, CI
architectures also expose more performance tradeoffs.
These tradeoffs make it hard to design policies that
deliver good performance. This paper presents a
criticality-based model for reasoning about CI
architectures, and uses that model to describe the
tradeoffs between gains from control independence
versus increased costs of honoring data dependences.
The model is then used to derive the design of a
criticality-aware task selection policy that strikes
the right balance between fetch-criticality and
execute-criticality. Finally, the paper validates the
model by attacking branch-misprediction induced
fetch-criticality through the above derived spawn
policy. This leads to as high as 100\% improvements in
performance, and in the region of 40\% or more
improvements for four of the benchmarks where this is
the main problem. Criticality analysis shows that this
improvement arises due to reduced fetch-criticality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "control independence; fetch-criticality; implicit
parallelization",
}
@Article{Pericas:2008:TLL,
author = "Miquel Peric{\`a}s and Adrian Cristal and Francisco J.
Cazorla and Ruben Gonz{\'a}lez and Alex Veidenbaum and
Daniel A. Jim{\'e}nez and Mateo Valero",
title = "A Two-Level Load\slash Store Queue Based on Execution
Locality",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "25--36",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.10",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Multicore processors have emerged as a powerful
platform on which to efficiently exploit thread-level
parallelism (TLP). However, due to Amdahl's Law, such
designs will be increasingly limited by the remaining
sequential components of applications. To overcome this
limitation it is necessary to design processors with
many lower-performance cores for TLP and some
high-performance cores designed to execute sequential
algorithms. Such cores will need to address the
memory-wall by implementing kilo-instruction windows.
Large window processors require large Load/Store Queues
that would be too slow if implemented using current
CAM-based designs. This paper proposes an Epoch-based
Load Store Queue (ELSQ), a new design based on
Execution Locality. It is integrated into a
large-window processor that has a fast, out-of-order
core operating only on L1/L2 cache hits and N slower
cores that process L2 misses and their dependent
instructions. The large LSQ is coupled with the slow
cores and is partitioned into N small and local LSQs,
one per core. We evaluate ELSQ in a large-window
environment, finding that it enables high performance
at low power. By exploiting locality among loads and
stores, ELSQ outperforms even an idealized central LSQ
when implemented on top of a decoupled processor
design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "execution locality; kilo-instruction processors;
load/store queue; power-efficiency",
}
@Article{Ipek:2008:SOM,
author = "Engin Ipek and Onur Mutlu and Jos{\'e} F.
Mart{\'\i}nez and Rich Caruana",
title = "Self-Optimizing Memory Controllers: a Reinforcement
Learning Approach",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "39--50",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382172",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Efficiently utilizing off-chip DRAM bandwidth is a
critical issue in designing cost-effective,
high-performance chip multiprocessors(CMPs).
Conventional memory controllers deliver relatively low
performance in part because they often employ
fixed,rigid access scheduling policies designed for
average-case application behavior. As a result, they
cannot learn and optimize the long-term performance
impact of their scheduling decisions,and cannot adapt
their scheduling policies to dynamic workload behavior.
We propose a new, self-optimizing memory controller
design that operates using the principles of
reinforcement learning (RL)to overcome these
limitations. Our RL-based memory controller observes
the system state and estimates the long-term
performance impact of each action it can take. In this
way, the controller learns to optimize its scheduling
policy on the fly to maximize long-term performance.
Our results show that an RL-based memory controller
improves the performance of a set of parallel
applications run on a 4-core CMP by 19\% on average
(upto 33\%), and it improves DRAM bandwidth utilization
by 22\%compared to a state-of-the-art controller.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; machine learning; memory
controller; memory systems; reinforcement learning",
}
@Article{Thoziyoor:2008:CMM,
author = "Shyamkumar Thoziyoor and Jung Ho Ahn and Matteo
Monchiero and Jay B. Brockman and Norman P. Jouppi",
title = "A Comprehensive Memory Modeling Tool and Its
Application to the Design and Analysis of Future Memory
Hierarchies",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "51--62",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.16",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper we introduce CACTI-D, a significant
enhancement of CACTI 5.0. CACTI-D adds support for
modeling of commodity DRAM technology and support for
main memory DRAM chip organization. CACTI-D enables
modeling of the complete memory hierarchy with
consistent models all the way from SRAM based L1 caches
through main memory DRAMs on DIMMs. We illustrate the
potential applicability of CACTI-D in the design and
analysis of future memory hierarchies by carrying out a
last level cache study for a multicore multithreaded
architecture at the 32nm technology node. In this study
we use CACTI-D to model all components of the memory
hierarchy including L1, L2, last level SRAM, logic
process based DRAM or commodity DRAM L3 caches, and
main memory DRAM chips. We carry out architectural
simulation using benchmarks with large data sets and
present results of their execution time, breakdown of
power in the memory hierarchy, and system energy-delay
product for the different system configurations. We
find that commodity DRAM technology is most attractive
for stacked last level caches, with significantly lower
energy-delay products.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache; CACTI; commodity DRAM; LLC; logic-process based
DRAM; SRAM",
}
@Article{Mutlu:2008:PAB,
author = "Onur Mutlu and Thomas Moscibroda",
title = "Parallelism-Aware Batch Scheduling: Enhancing both
Performance and Fairness of Shared {DRAM} Systems",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "63--74",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382128",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In a chip-multiprocessor (CMP) system, the DRAM system
is shared among cores. In a shared DRAM system,
requests from a thread can not only delay requests from
other threads by causing bank/bus/row-buffer conflicts
but they can also destroy other threads'
DRAM-bank-level parallelism. Requests whose latencies
would otherwise have been overlapped could effectively
become serialized. As a result both fairness and system
throughput degrade, and some threads can starve for
long time periods. This paper proposes a fundamentally
new approach to designing a shared DRAM controller that
provides quality of service to threads,while also
improving system throughput. Our parallelism-aware
batch scheduler (PAR-BS) design is based on two key
ideas. First, PARBS processes DRAM requests in batches
to provide fairness and to avoid starvation of
requests. Second, to optimize system throughput,PAR-BS
employs a parallelism-aware DRAM scheduling policy that
aims to process requests from a thread in parallel in
the DRAM banks, thereby reducing the memory-related
stall-time experienced by the thread. PAR-BS seamlessly
incorporates support for system-level thread priorities
and can provide different service levels, including
purely opportunistic service, to threads with different
priorities. We evaluate the design trade-offs involved
in PAR-BS and compare it to four previously proposed
DRAM scheduler designs on 4-, 8-, and16-core systems.
Our evaluations show that, averaged over 100 4-core
workloads, PAR-BS improves fairness by 1.11X and system
throughput by 8.3\% compared to the best previous
scheduling technique, Stall-Time Fair Memory (STFM)
scheduling. Based on simple request prioritization
rules, PAR-BS is also simpler to implement than STFM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; DRAM systems; fairness; memory
scheduling; memory systems; memory-level parallelism;
multi-core systems; quality of service",
}
@Article{Kim:2008:TDH,
author = "John Kim and William J. Dally and Steve Scott and
Dennis Abts",
title = "Technology-Driven, Highly-Scalable {Dragonfly}
Topology",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "77--88",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.19",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Evolving technology and increasing pin-bandwidth
motivate the use of high-radix routers to reduce the
diameter, latency, and cost of interconnection
networks. High-radix networks, however, require longer
cables than their low-radix counterparts. Because
cables dominate network cost, the number of cables, and
particularly the number of long, global cables should
be minimized to realize an efficient network. In this
paper, we introduce the dragonfly topology which uses a
group of high-radix routers as a virtual router to
increase the effective radix of the network. With this
organization, each minimally routed packet traverses at
most one global channel. By reducing global channels, a
dragonfly reduces cost by 20\% compared to a flattened
butterfly and by 52\% compared to a folded Clos network
in configurations with $ \geq $ 16K nodes. We also
introduce two new variants of global adaptive routing
that enable load-balanced routing in the dragonfly.
Each router in a dragonfly must make an adaptive
routing decision based on the state of a global channel
connected to a different router. Because of the
indirect nature of this routing decision, conventional
adaptive routing algorithms give degraded performance.
We introduce the use of selective virtual-channel
discrimination and the use of credit round-trip latency
to both sense and signal channel congestion. The
combination of these two methods gives throughput and
latency that approaches that of an ideal adaptive
routing algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dragonfly; interconnection networks; topology",
}
@Article{Lee:2008:GSF,
author = "Jae W. Lee and Man Cheuk Ng and Krste Asanovic",
title = "Globally-Synchronized Frames for Guaranteed
Quality-of-Service in On-Chip Networks",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "89--100",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382130",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Future chip multiprocessors (CMPs) may have hundreds
to thousands of threads competing to access shared
resources, and will require quality-of-service (QoS)
support to improve system utilization. Although there
has been significant work in QoS support within
resources such as caches and memory controllers, there
has been less attention paid to QoS support in the
multi-hop on-chip networks that will form an important
component in future systems. In this paper we introduce
Globally-Synchronized Frames (GSF), a framework for
providing guaranteed QoS in on-chip networks in terms
of minimum bandwidth and a maximum delay bound. The GSF
framework can be easily integrated in a conventional
virtual channel (VC) router without significantly
increasing the hardware complexity. We rely on a fast
barrier network, which is feasible in an on-chip
environment, to efficiently implement GSF. Performance
guarantees are verified by both analysis and
simulation. According to our simulations, all
concurrent flows receive their guaranteed minimum share
of bandwidth in compliance with a given bandwidth
allocation. The average throughput degradation of GSF
on a 8x8 mesh network is within 10\% compared to the
conventional best-effort VC router in most cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; interconnects; multicores;
on-chip network; quality-of-service; resource
management; router; software interface",
}
@Article{Kim:2008:PCN,
author = "Martha Mercaldi Kim and John D. Davis and Mark Oskin
and Todd Austin",
title = "Polymorphic On-Chip Networks",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "101--112",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.25",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As the number of cores per die increases, be they
processors, memory blocks, or custom accelerators, the
on-chip interconnect the cores use to communicate gains
importance. We begin this study with an
area-performance analysis of the interconnect design
space. We find that there is no single network design
that yields optimal performance across a range of
traffic patterns. This indicates that there is an
opportunity to gain performance by customizing the
interconnect to a particular application or workload.
We propose polymorphic on-chip networks to enable
per-application network customization. This network can
be configured prior to application runtime, to have the
topology and buffering of arbitrary network designs.
This paper proposes one such polymorphic network
architecture. We demonstrate its modes of
configurability, and evaluate the polymorphic network
architecture design space, producing polymorphic
fabrics that minimize the network area overhead.
Finally, we expand the network on chip design space to
include a polymorphic network design, showing that a
single polymorphic network is capable of implementing
all of the pareto optimal fixed-network designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "configurable hardware; on-chip network",
}
@Article{Baugh:2008:UHM,
author = "Lee Baugh and Naveen Neelakantam and Craig Zilles",
title = "Using Hardware Memory Protection to Build a
High-Performance, Strongly-Atomic Hybrid Transactional
Memory",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "115--126",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382132",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We demonstrate how fine-grained memory protection can
be used in support of transactional memory systems:
first showing how a software transactional memory
system (STM) can be made strongly atomic by using
memory protection on transactionally-held state, then
showing how such a strongly-atomic STM can be used with
a bounded hardware TM system to build a hybrid TM
system in which zero-overhead hardware transactions may
safely run concurrently with potentially-conflicting
software transactions. We experimentally demonstrate
how this hybrid TM organization avoids the common-case
overheads associated with previous hybrid TM proposals,
achieving performance rivaling an unbounded HTM system
without the hardware complexity of ensuring completion
of arbitrary transactions in hardware. As part of our
findings, we identify key policies regarding contention
management within and across the hardware and software
TM components that are key to achieving robust
performance with a hybrid TM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "abort handler; hybrid; memory protection; primitives;
strong atomicity; transactional memory",
}
@Article{Bobba:2008:TEE,
author = "Jayaram Bobba and Neelam Goyal and Mark D. Hill and
Michael M. Swift and David A. Wood",
title = "{TokenTM}: Efficient Execution of Large Transactions
with Hardware Transactional Memory",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "127--138",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382133",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Current hardware transactional memory systems seek to
simplify parallel programming, but assume that large
transactions are rare, so it is acceptable to penalize
their performance or concurrency. However, future
programmers may wish to use large transactions more
often in order to integrate with higher-level
programming models (e.g., database transactions) or
perform selected I/O operations. To prevent the 'small
transactions are common' assumption from becoming
self-fulfilling, this paper contributes TokenTM --- an
unbounded HTM that uses the abstraction of tokens to
precisely track conflicts on an unbounded number of
memory blocks. TokenTM implements tokens with new
mechanisms, including metastate fission/fusion and fast
token release. TokenTM executes small transactions
fast, executes concurrent large transactions with no
penalty to nonconflicting transactions, and gracefully
handles paging, context switching, and System-V-style
shared memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "coherence protocols; hardware transactional memory;
metastates; tokens; transactional memory; unbounded
transactions",
}
@Article{Shriraman:2008:FDT,
author = "Arrvindh Shriraman and Sandhya Dwarkadas and Michael
L. Scott",
title = "Flexible Decoupled Transactional Memory Support",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "139--150",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.17",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A high-concurrency transactional memory (TM)
implementation needs to track concurrent accesses,
buffer speculative updates, and manage conflicts. We
present a system, FlexTM (FLEXible Transactional
Memory), that coordinates four decoupled hardware
mechanisms: read and write signatures, which summarize
per-thread access sets; per-thread conflict summary
tables (CSTs), which identify the threads with which
conflicts have occurred; Programmable Data Isolation,
which maintains speculative updates in the local cache
and employs a thread-private buffer (in virtual memory)
in the rare event of overflow; and Alert-On-Update,
which selectively notifies threads about coherence
events. All mechanisms are software-accessible, to
enable virtualization and to support transactions of
arbitrary length. FlexTM allows software to determine
when to manage conflicts (either eagerly or lazily),
and to employ a variety of conflict management and
commit protocols. We describe an STM-inspired protocol
that uses CSTs to manage conflicts in a distributed
manner (no global arbitration) and allows parallel
commits. In experiments with a prototype on
Simics/GEMS, FlexTM exhibits 5x speedup over
high-quality software TM, with no loss in policy
flexibility. Its distributed commit protocol is also
more efficient than a central hardware manager. Our
results highlight the importance of flexibility in
determining when to manage conflicts: lazy maximizes
concurrency and helps to ensure forward progress while
eager provides better overall utilization in a
multi-programmed system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache coherence; Conflict detection; FlexTM; Hardware;
Multiprocessors; RTM; Transactional memory",
}
@Article{Vantrease:2008:CSI,
author = "Dana Vantrease and Robert Schreiber and Matteo
Monchiero and Moray McLaren and Norman P. Jouppi and
Marco Fiorentino and Al Davis and Nathan Binkert and
Raymond G. Beausoleil and Jung Ho Ahn",
title = "{Corona}: System Implications of Emerging Nanophotonic
Technology",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "153--164",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382135",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We expect that many-core microprocessors will push
performance per chip from the 10 gigaflop to the 10
teraflop range in the coming decade. To support this
increased performance, memory and inter-core bandwidths
will also have to scale by orders of magnitude. Pin
limitations, the energy cost of electrical signaling,
and the non-scalability of chip-length global wires are
significant bandwidth impediments. Recent developments
in silicon nanophotonic technology have the potential
to meet these off- and on-stack bandwidth requirements
at acceptable power levels. Corona is a 3D many-core
architecture that uses nanophotonic communication for
both inter-core communication and off-stack
communication to memory or I/O devices. Its peak
floating-point performance is 10 teraflops. Dense
wavelength division multiplexed optically connected
memory modules provide 10 terabyte per second memory
bandwidth. A photonic crossbar fully interconnects its
256 low-power multithreaded cores at 20 terabyte per
second bandwidth. We have simulated a 1024 thread
Corona system running synthetic benchmarks and scaled
versions of the SPLASH-2 benchmark suite. We believe
that in comparison with an electrically-connected
many-core alternative that uses the same on-stack
interconnect power, Corona can provide 2 to 6 times
more performance on many memory intensive workloads,
while simultaneously reducing power.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "3D stacking; many-core CMP; nanophotonics; on-chip
Networks",
}
@Article{Kreger-Stickles:2008:MAI,
author = "Lucas Kreger-Stickles and Mark Oskin",
title = "Microcoded Architectures for Ion-Tap Quantum
Computers",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "165--176",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382136",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper we present the first ever systematic
design space exploration of microcoded software fault
tolerant ion-trap quantum computers. This exploration
reveals the critical importance of a well-tuned
microcode for providing high performance and ensuring
system reliability. In addition, we find that, despite
recent advances in the reliability of quantum memory,
the impact of errors due to stored quantum data is now,
and will continue to be, a major source of systemic
error. Finally, our exploration reveals a single design
which out performs all others we considered in run
time, fidelity and area. For completeness our design
space exploration includes designs from prior work and
we find a novel design that is 1/2 the size, 3 times as
fast, and an order of magnitude more reliable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "architecture; ion-trap; microcoded; quantum",
}
@Article{Isailovic:2008:RQC,
author = "Nemanja Isailovic and Mark Whitney and Yatish Patel
and John Kubiatowicz",
title = "Running a Quantum Circuit at the Speed of Data",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "177--188",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382137",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We analyze circuits for kernels from popular quantum
computing applications, characterizing the hardware
resources necessary to take ancilla preparation off the
critical path. The result is a chip entirely dominated
by ancilla generation circuits. To address this issue,
we introduce optimized ancilla factories and analyze
their structure and physical layout for ion trap
technology. We introduce a new quantum computing
architecture with highly concentrated data-only regions
surrounded by shared ancilla factories. The results are
a reduced dependence on costly teleportation, more
efficient distribution of generated ancillae and more
than five times speedup over previous proposals.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "ancilla factory; microarchitecture; quantum",
}
@Article{Liang:2008:RVT,
author = "Xiaoyao Liang and Gu-Yeon Wei and David Brooks",
title = "{ReVIVaL}: a Variation-Tolerant Architecture Using
Voltage Interpolation and Variable Latency",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "191--202",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382138",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Process variations are poised to significantly degrade
performance benefits sought by moving to the next
nanoscale technology node. Parameter fluctuations in
devices can introduce large variations in peak
operation among chips, among cores on a single chip,
and among microarchitectural blocks within one core.
Hence, it will be difficult to only rely on traditional
frequency binning to efficiently cover the large
variations that are expected. Furthermore, multiple
voltage/frequency domains introduce significant
hardware overhead and alone cannot address the full
extent of delay variations expected in future
multi-core systems. In this paper, we present ReVIVaL,
which combines two fine-grained post-fabrication tuning
techniques---voltage interpolation(VI) and variable
latency(VL). We show that the frequency variation
between chips, between cores on one chip, and between
functional units within cores can be reduced to a very
small range. The effectiveness of these techniques are
further verified through experiments on test chips
fabricated in a 130nm CMOS process. Detailed
architectural simulations of multi-core processors
demonstrate significant performance and power
advantages are possible by combining variable latency
with voltage interpolation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessor; microarchitecture; process
variations",
}
@Article{Wilkerson:2008:TCC,
author = "Chris Wilkerson and Hongliang Gao and Alaa R.
Alameldeen and Zeshan Chishti and Muhammad Khellah and
Shih-Lien Lu",
title = "Trading off Cache Capacity for Reliability to Enable
Low Voltage Operation",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "203--214",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382139",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "One of the most effective techniques to reduce a
processor's power consumption is to reduce supply
voltage. However, reducing voltage in the context of
manufacturing-induced parameter variations can cause
many types of memory circuits to fail. As a result,
voltage scaling is limited by a minimum voltage, often
called Vccmin, beyond which circuits may not operate
reliably. Large memory structures (e.g., caches)
typically set Vccmin for the whole processor. In this
paper, we propose two architectural techniques that
enable microprocessor caches (L1and L2), to operate at
low voltages despite very high memory cell failure
rates. The Word-disable scheme combines two consecutive
cache lines, to form a single cache line where only
non-failing words are used. The Bit-fix scheme uses a
quarter of the ways in a cache set to store positions
and fix bits for failing bits in other ways of the set.
During high voltage operation, both schemes allow use
of the entire cache. During low voltage operation, they
sacrifice cache capacity by 50\% and 25\%,
respectively, to reduce Vccmin below 500mV. Compared to
current designs with a Vccmin of 825mV, our schemes
enable a 40\% voltage reduction, which reduces power by
85\% and energy per instruction (EPI) by 53\%",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache; cache design; low power; low voltage;
reliability; SRAM; stability; Vccmin",
}
@Article{Roesner:2008:CDP,
author = "Franziska Roesner and Doug Burger and Stephen W.
Keckler",
title = "Counting Dependence Predictors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "215--226",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382140",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern processors rely on memory dependence prediction
to execute load instructions as early as possible,
speculating that they are not dependent on an earlier,
unissued store. To date, the most sophisticated
dependence predictors, such as Store Sets, have been
tightly coupled to the fetch and execution streams,
requiring global knowledge of the in-flight stream of
stores to synchronize loads with specific stores. This
paper proposes a new dependence predictor design,
called a Counting Dependence Predictor (CDP). The key
feature of CDPs is that the prediction mechanism
predicts some set of events for which a particular
dynamic load should wait, which may include some number
of matching stores. By waiting for local events only,
this dependence predictor can work effectively in a
distributed microarchitecture where centralized fetch
and execution streams are infeasible or undesirable. We
describe and evaluate a distributed Counting Dependence
Predictor and protocol that achieves 92\% of the
performance of perfect memory disambiguation. It
outperforms a load-wait table, similar to the Alpha
21264, by 11\%. Idealized, centralized implementations
of Store Sets and the Exclusive Collision Predictor,
both of which would be difficult to implement in a
distributed microarchitecture, achieve 97\% and 94\% of
oracular performance, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dependence prediction; memory systems; multiprocessor
and multicore architectures",
}
@Article{Jerger:2008:VCT,
author = "Natalie Enright Jerger and Li-Shiuan Peh and Mikko
Lipasti",
title = "Virtual Circuit Tree Multicasting: a Case for On-Chip
Hardware Multicast Support",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "229--240",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382141",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Current state-of-the-art on-chip networks provide
efficiency, high throughput, and low latency for
one-to-one (unicast) traffic. The presence of
one-to-many (multicast) or one-to-all (broadcast)
traffic can significantly degrade the performance of
these designs, since they rely on multiple unicasts to
provide one-to-many communication. This results in a
burst of packets from a single source and is a very
inefficient way of performing multicast and broadcast
communication. This inefficiency is compounded by the
proliferation of architectures and coherence protocols
that require multicast and broadcast communication. In
this paper, we characterize a wide array of on-chip
communication scenarios that benefit from hardware
multicast support. We propose Virtual Circuit Tree
Multicasting (VCTM) and present a detailed multicast
router design that improves network performance by up
to 90\% while reducing network activity (hence power)
by up to 53\%. Our VCTM router is flexible enough to
improve interconnect performance for a broad spectrum
of multicasting scenarios,and achieves these benefits
with straightforward and inexpensive extensions to a
state-of-the-art packet-switched router.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache coherence protocol; interconnection network;
multiprocessor",
}
@Article{Kodi:2008:IIR,
author = "Avinash Karanth Kodi and Ashwini Sarathy and Ahmed
Louri",
title = "{iDEAL}: Inter-router Dual-Function Energy and
Area-Efficient Links for Network-on-Chip {(NoC)}
Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "241--250",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382142",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Network-on-Chip (NoC) architectures have been adopted
by a growing number of multi-core designs as a flexible
and scalable solution to the increasing wire delay
constraints in the deep sub-micron regime. However, the
shrinking feature size limits the performance of NoCs
due to power and area constraints. Research into the
optimization of NoCs has shown that a reduction in the
number of buffers in the NoC routers reduces the power
and area overhead but degrades the network performance.
In this paper, we propose iDEAL, a low-power
area-efficient NoC architecture by reducing the number
of buffers within the router. To overcome the
performance degradation caused by the reduced buffer
size, we propose to use adaptive dual-function links
capable of data transmission as well as data storage
when required. Simulation results for the proposed
architecture show that reducing the router buffer size
in half and using the adaptive dual-function links
achieves nearly 40\% savings in buffer power, 30\%
savings in overall network power and about 41\% savings
in the router area, with only a marginal 1-3\% drop in
performance. Moreover, the performance in iDEAL can be
further improved by aggressive and speculative flow
control techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "interconnects; low-power architecture;
network-on-chip",
}
@Article{Park:2008:MML,
author = "Dongkook Park and Soumya Eachempati and Reetuparna Das
and Asit K. Mishra and Yuan Xie and N. Vijaykrishnan
and Chita R. Das",
title = "{MIRA}: a Multi-layered On-Chip Interconnect Router
Architecture",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "251--261",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.13",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recently, Network-on-Chip (NoC) architectures have
gained popularity to address the interconnect delay
problem for designing CMP / multi-core / SoC systems in
deep sub-micron technology. However, almost all prior
studies have focused on 2D NoC designs. Since three
dimensional (3D) integration has emerged to mitigate
the interconnect delay problem, exploring the NoC
design space in 3D can provide ample opportunities to
design high performance and energy-efficient NoC
architectures. In this paper, we propose a 3D stacked
NoC router architecture, called MIRA, which unlike the
3D routers in previous works, is stacked into multiple
layers and optimized to reduce the overall area
requirements and power consumption. We discuss the
design details of a four-layer 3D NoC and its enhanced
version with additional express channels, and compare
them against a ($ 6 \mu $) 2D design and a baseline 3D
design. All the designs are evaluated using a
cycle-accurate 3D NoC simulator, and integrated with
the Orion power model for performance and power
analysis. The simulation results with synthetic and
application traces demonstrate that the proposed
multi-layered NoC routers can outperform the 2D and
na{\"\i}ve 3D designs in terms of performance and
power. It can achieve up to 42\% reduction in power
consumption and up to 51\% improvement in average
latency with synthetic workloads. With real workloads,
these benefits are around 67\% and 38\%,
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "3D; express channel; express path; Network-on-Chip;
NoC; on-chip interconnect; router architecture",
}
@Article{Hower:2008:REE,
author = "Derek R. Hower and Mark D. Hill",
title = "{Rerun}: Exploiting Episodes for Lightweight Memory
Race Recording",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "265--276",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382144",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Multiprocessor deterministic replay has many potential
uses in the era of multicore computing, including
enhanced debugging, fault tolerance, and intrusion
detection. While sources of nondeterminism in a
uniprocessor can be recorded efficiently in software,
it seems likely that hardware support will be needed in
a multiprocessor environment where the outcome of
memory races must also be recorded. We develop a memory
race recording mechanism, called Rerun, that uses small
hardware state ($ \approx 166 $ bytes/core), writes a
small race log ($ \approx 4 $ bytes/kilo- instruction),
and operates well as the number of cores per system
scales (e.g., to 16 cores). Rerun exploits the dual of
conventional wisdom in race recording: Rather than
record information about individual memory accesses
that conflict, we record how long a thread executes
without conflicting with other threads. In particular,
Rerun passively creates atomic episodes. Each episode
is a dynamic instruction sequence that a thread happens
to execute without interacting with other threads.
Rerun uses Lamport Clocks to order episodes and enable
replay of an equivalent execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "determinism; multicore; race recording",
}
@Article{Lucia:2008:AAD,
author = "Brandon Lucia and Joseph Devietti and Karin Strauss
and Luis Ceze",
title = "{Atom-Aid}: Detecting and Surviving Atomicity
Violations",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "277--288",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382145",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Writing shared-memory parallel programs is
error-prone. Among the concurrency errors that
programmers often face are atomicity violations, which
are especially challenging. They happen when
programmers make incorrect assumptions about atomicity
and fail to enclose memory accesses that should occur
atomically inside the same critical section. If these
accesses happen to be interleaved with conflicting
accesses from different threads, the program might
behave incorrectly. Recent architectural proposals
arbitrarily group consecutive dynamic memory operations
into atomic blocks to enforce memory ordering at a
coarse grain. This provides what we call implicit
atomicity, as the atomic blocks are not derived from
explicit program annotations. In this paper, we make
the fundamental observation that implicit atomicity
probabilistically hides atomicity violations by
reducing the number of interleaving opportunities
between memory operations. We then propose Atom-Aid,
which creates implicit atomic blocks intelligently
instead of arbitrarily, dramatically reducing the
probability that atomicity violations will manifest
themselves. Atom-Aid is also able to report where
atomicity violations might exist in the code, providing
resilience and debuggability. We evaluate Atom-Aid
using buggy code from applications including Apache,
MySQL, and XMMS, showing that Atom-Aid virtually
eliminates the manifestation of atomicity violations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bug; multiprocessors; parallel programming; software
reliability",
}
@Article{Montesinos:2008:DRD,
author = "Pablo Montesinos and Luis Ceze and Josep Torrellas",
title = "{DeLorean}: Recording and Deterministically Replaying
Shared-Memory Multiprocessor Execution Efficiently",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "289--300",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.36",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Support for deterministic replay of multithreaded
execution can greatly help in finding concurrency bugs.
For highest effectiveness, replay schemes should (i)
record at production-run speed, (ii) keep their logging
requirements minute, and (iii) replay at a speed
similar to that of the initial execution. In this
paper, we propose a new substrate for deterministic
replay that provides substantial advances along these
axes. In our proposal, processors execute blocks of
instructions atomically, as in transactional memory or
speculative multithreading, and the system only needs
to record the commit order of these blocks. We call our
scheme DeLorean. Our results show that DeLorean records
execution at a speed similar to that of Release
Consistency (RC) execution and replays at about 82\% of
its speed. In contrast, most current schemes only
record at the speed of Sequential Consistency (SC)
execution. Moreover, DeLorean only needs 7.5\% of the
log size needed by a state-of-the-art scheme. Finally,
DeLorean can be configured to need only 0.6\% of the
log size of the state-of-the-art scheme at the cost of
recording at 86\% of RC's execution speed --- still
faster than SC. In this configuration, the log of an
8-processor 5-GHz machine is estimated to be only about
20GB per day.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sankar:2008:IDP,
author = "Sriram Sankar and Sudhanva Gurumurthi and Mircea R.
Stan",
title = "Intra-disk Parallelism: An Idea Whose Time Has Come",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "303--314",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382147",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Server storage systems use a large number of disks to
achieve high performance, thereby consuming a
significant amount of power. In this paper, we propose
to significantly reduce the power consumed by such
storage systems via intra-disk parallelism, wherein
disk drives can exploit parallelism in the I/O request
stream. Intra-disk parallelism can facilitate replacing
a large disk array with a smaller one, using the
minimum number of disk drives needed to satisfy the
capacity requirements. We show that the design space of
intra-disk parallelism is large and present a taxonomy
to formulate specific implementations within this
space. Using a set of commercial workloads, we perform
a limit study to identify the key performance
bottlenecks that arise when we replace a storage array
that is tuned to provide high performance with a single
high-capacity disk drive. We show that it is possible
to match, and even surpass, the performance of a
storage array for these workloads by using a single
disk drive of sufficient capacity that exploits
intra-disk parallelism, while significantly reducing
the power consumed by the storage system. We evaluate
the performance and power consumption of disk arrays
composed of intra-disk parallel drives, and discuss
engineering and cost issues related to the
implementation and deployment of such disk drives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "disk; I/O; parallelism; power; storage",
}
@Article{Lim:2008:UDN,
author = "Kevin Lim and Parthasarathy Ranganathan and Jichuan
Chang and Chandrakant Patel and Trevor Mudge and Steven
Reinhardt",
title = "Understanding and Designing New Server Architectures
for Emerging Warehouse-Computing Environments",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "315--326",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382148",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper seeks to understand and design
next-generation servers for emerging
'warehouse-computing' environments. We make two key
contributions. First, we put together a detailed
evaluation infrastructure including a new benchmark
suite for warehouse-computing workloads, and detailed
performance, cost, and power models, to quantitatively
characterize bottlenecks. Second, we study a new
solution that incorporates volume non-server-class
components in novel packaging solutions, with memory
sharing and flash-based disk caching. Our results show
that this approach has promise, with a 2X improvement
on average in performance-per-dollar for our benchmark
suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "evaluation; server architecture; warehouse-computing",
}
@Article{Kgil:2008:INF,
author = "Taeho Kgil and David Roberts and Trevor Mudge",
title = "Improving {NAND} Flash Based Disk Caches",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "327--338",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.32",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Flash is a widely used storage device that provides
high density and low power, appealing properties for
general purpose computing. Today, its usual application
is in portable special purpose devices such as MP3
players. In this paper we examine its use in the server
domain --- a more general purpose environment.
Aggressive process scaling and the use of multi-level
cells continues to improve density ahead of Moore's Law
predictions, making Flash even more attractive as a
general purpose memory solution. Unfortunately,
reliability limits the use of Flash. To seriously
consider Flash in the server domain, architectural
support must exist to address this concern. This paper
first shows how Flash can be used in today's server
platforms as a disk cache. It then proposes two
improvements. The first improves performance and
reliability by splitting Flash based disk caches into
separate read and write regions. The second improves
reliability by employing a programmable Flash memory
controller. It can change the error code strength
(number of correctable bits) and the number of bits
that a memory cell can store (cell density) according
to the demands of the application. Our studies show
that Flash reduces overall power consumed by the system
memory and hard disk drive up to 3 times while
maintaining performance. We also show that Flash
lifetime can be improved by a factor of 20 when using a
programmable Flash memory controller, if some
performance degradation (below 5\%) is acceptable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "data center; disk cache; Flash; Flash memory
controller; NAND Flash",
}
@Article{Li:2008:OEA,
author = "Xiaodong Li and Sarita V. Adve and Pradip Bose and
Jude A. Rivers",
title = "Online Estimation of Architectural Vulnerability
Factor for Soft Errors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "341--352",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382150",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As CMOS technology scales and more transistors are
packed on to the same chip, soft error reliability has
become an increasingly important design issue for
processors. Prior research has shown that there is
significant architecture-level masking, and many soft
error solutions take advantage of this effect. Prior
work has also shown that the degree of such masking can
vary significantly across workloads and between
individual workload phases, motivating dynamic
adaptation of reliability solutions for optimal cost
and benefit. For such adaptation, it is important to be
able to accurately estimate the amount of masking or
the architecture vulnerability factor (AVF) online,
while the program is running. Unfortunately, existing
solutions for estimating AVF are often based on offline
simulators and hard to implement in real processors.
This paper proposes a novel way of estimating AVF
online, using simple modifications to the processor.
The estimation method applies to both logic and storage
structures on the processor. Compared to previous
methods for estimating AVF, our method does not require
any offline simulation or calibration for different
workloads. We tested our method with a widely used
simulator from industry, for four processor structures
and for 100 to 200 intervals of each of eleven SPEC
benchmarks. The results show that our method provides
acceptably accurate AVF estimates at runtime. The
absolute error rarely exceeds 0.08 across all
application intervals for all structures, and the mean
absolute error for a given application and structure
combination is always within 0.05.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "AVF estimation; processor reliability; soft error",
}
@Article{Shin:2008:PWR,
author = "Jeonghee Shin and Victor Zyuban and Pradip Bose and
Timothy M. Pinkston",
title = "A Proactive Wearout Recovery Approach for Exploiting
Microarchitectural Redundancy to Extend Cache {SRAM}
Lifetime",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "353--362",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382151",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Microarchitectural redundancy has been proposed as a
means of improving chip lifetime reliability. It is
typically used in a reactive way, allowing chips to
maintain operability in the presence of failures by
detecting and isolating, correcting, and/or replacing
components on a first-come, first-served basis only
after they become faulty. In this paper, we explore an
alternative, more preferred method of exploiting
microarchitectural redundancy to enhance chip lifetime
reliability. In our proposed approach, redundancy is
used proactively to allow non-faulty microarchitecture
components to be temporarily deactivated, on a rotating
basis, to suspend and/or recover from certain wearout
effects. This approach improves chip lifetime
reliability by warding off the onset of wearout
failures as opposed to reacting to them posteriorly.
Applied to on-chip cache SRAM for combating
NBTI-induced wearout failure, our proactive wearout
recovery approach increases lifetime reliability
(measured in mean-time-to-failure) of the cache by
about a factor of seven relative to no use of
microarchitectural redundancy and a factor of five
relative to conventional reactive use of redundancy
having similar area overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "lifetime reliability; microarchitectural redundancy;
proactive approach; wearout recovery",
}
@Article{Teodorescu:2008:VAA,
author = "Radu Teodorescu and Josep Torrellas",
title = "Variation-Aware Application Scheduling and Power
Management for Chip Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "363--374",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.40",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Within-die process variation causes individual cores
in a ChipMultiprocessor (CMP) to differ substantially
in both static power consumed and maximum frequency
supported. In this environment,ignoring variation
effects when scheduling applications or when managing
power with Dynamic Voltage and Frequency Scaling (DVFS)
is suboptimal. This paper proposes variation-aware
algorithms for application scheduling and power
management. One such power management algorithm, called
{\em LinOpt}, uses linear programming to find the best
voltage and frequency levels for each of the cores in
the CMP --- maximizing throughput at a given power
budget. In a 20-core CMP, the combination of
variation-aware application scheduling and {\em
LinOpt\/} increases the average throughput by 12--17\%
and reduces the average $ E D^2 $ by 30--38\% --- all
relative to using variation-aware scheduling together
with a simple extension to Intel's Foxton power
management algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "application scheduling; power management; process
variation",
}
@Article{Chen:2008:FHA,
author = "Shimin Chen and Michael Kozuch and Theodoros Strigkos
and Babak Falsafi and Phillip B. Gibbons and Todd C.
Mowry and Vijaya Ramachandran and Olatunji Ruwase and
Michael Ryan and Evangelos Vlachos",
title = "Flexible Hardware Acceleration for Instruction-Grain
Program Monitoring",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "377--388",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382153",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Instruction-grain program monitoring tools, which
check and analyze executing programs at the granularity
of individual instructions, are invaluable for quickly
detecting bugs and security attacks and then limiting
their damage (via containment and/or recovery).
Unfortunately, their fine-grain nature implies very
high monitoring overheads for software-only tools,
which are typically based on dynamic binary
instrumentation. Previous hardware proposals either
focus on mechanisms that target specific bugs or
address only the cost of binary instrumentation. In
this paper, we propose a flexible hardware solution for
accelerating a wide range of instruction-grain
monitoring tools. By examining a number of diverse
tools (for memory checking, security tracking, and data
race detection), we identify three significant common
sources of overheads and then propose three novel
hardware techniques for addressing these overheads:
Inheritance Tracking, Idempotent Filters, and
Metadata-TLBs. Together, these constitute a
general-purpose hardware acceleration framework.
Experimental results show our framework reduces
overheads by 2-3X over the previous state-of-the-art,
while supporting the needed flexibility.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "hardware acceleration; idempotent filter; inheritance
tracking; instruction-grain program monitoring; LBA;
lifeguards; log-based architectures; metadata-TLB",
}
@Article{Clark:2008:VVE,
author = "Nathan Clark and Amir Hormati and Scott Mahlke",
title = "{VEAL}: Virtualized Execution Accelerator for Loops",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "389--400",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.33",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Performance improvement solely through transistor
scaling is becoming more and more difficult, thus it is
increasingly common to see domain specific accelerators
used in conjunction with general purpose processors to
achieve future performance goals. There is a serious
drawback to accelerators, though: binary compatibility.
An application compiled to utilize an accelerator
cannot run on a processor without that accelerator, and
applications that do not utilize an accelerator will
never use it. To overcome this problem, we propose
decoupling the instruction set architecture from the
underlying accelerators. Computation to be accelerated
is expressed using a processor's baseline instruction
set, and light-weight dynamic translation maps the
representation to whatever accelerators are available
in the system. In this paper, we describe the changes
to a compilation framework and processor system needed
to support this abstraction for an important set of
accelerator designs that support innermost loops. In
this analysis, we investigate the dynamic overheads
associated with abstraction as well as the
static/dynamic tradeoffs to improve the dynamic mapping
of loop-nests. As part of the exploration, we also
provide a quantitative analysis of the hardware
characteristics of effective loop accelerators. We
conclude that using a hybrid static-dynamic compilation
approach to map computation on to loop-level
accelerators is a practical way to increase computation
efficiency, without the overheads associated with
instruction set modification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2008:SSP,
author = "Haibo Chen and Xi Wu and Liwei Yuan and Binyu Zang and
Pen-chung Yew and Frederic T. Chong",
title = "From Speculation to Security: Practical and Efficient
Information Flow Tracking Using Speculative Hardware",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "401--412",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382156",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Dynamic information flow tracking (also known as taint
tracking) is an appealing approach to combat various
security attacks. However, the performance of
applications can severely degrade without hardware
support for tracking taints. This paper observes that
information flow tracking can be efficiently emulated
using deferred exception tracking in microprocessors
supporting speculative execution. Based on this
observation, we propose SHIFT, a low-overhead,
software-based dynamic information flow tracking system
to detect a wide range of attacks. The key idea is to
treat tainted state (describing untrusted data) as
speculative state (describing deferred exceptions).
SHIFT leverages existing architectural support for
speculative execution to track tainted state in
registers and needs to instrument only load and store
instructions to track tainted state in memory using a
bitmap, which results in significant performance
advantages. Moreover, by decoupling mechanisms for
taint tracking from security policies, SHIFT can detect
a wide range of exploits, including high-level semantic
attacks. We have implemented SHIFT using the Itanium
processor, which has support for deferred exceptions,
and by modifying GCC to instrument loads and stores. A
security assessment shows that SHIFT can detect both
low-level memory corruption exploits as well as
high-level semantic attacks with no false positives.
Performance measurements show that SHIFT incurs about
1\% overhead for server applications. The performance
slowdown for SPEC-INT2000 is 2.81X and 2.27X for
tracking at byte-level and word-level respectively.
Minor architectural improvements to the Itanium
processor (adding three simple instructions) can reduce
the performance slowdown down to 2.32X and 1.8X for
byte-level and word-level tracking, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "deferred exception; dynamic information flow tracking;
speculative execution; taint tracking",
}
@Article{Boneti:2008:SCP,
author = "Carlos Boneti and Francisco J. Cazorla and Roberto
Gioiosa and Alper Buyuktosunoglu and Chen-Yong Cher and
Mateo Valero",
title = "Software-Controlled Priority Characterization of
{POWER5} Processor",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "415--426",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1109/ISCA.2008.8",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Due to the limitations of instruction-level
parallelism, thread-level parallelism has become a
popular way to improve processor performance. One
example is the IBM POWER5TM processor, a two-context
simultaneous-multithreaded dual-core chip. In each SMT
core, the IBM POWER5 features two levels of thread
resource balancing and prioritization. The first level
provides automatic in-hardware resource balancing,
while the second level is a software-controlled
priority mechanism that presents eight levels of thread
priorities. Currently, software-controlled
prioritization is only used in limited number of cases
in the software platforms due to lack of performance
characterization of the effects of this mechanism. In
this work, we characterize the effects of the
software-based prioritization on several different
workloads. We show that the impact of the
prioritization significantly depends on the workloads
coscheduled on a core. By prioritizing the right task,
it is possible to obtain more than two times of
throughput improvement for synthetic workloads compared
to the baseline. We also present two application case
studies targeting two different performance metrics:
the first case study improves overall throughput by
23.7\% and the second case study reduces the total
execution time by 9.3\%. In addition, we show the
circumstances when a background thread can be run
transparently without affecting the performance of the
foreground thread.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "IBM POWER5; performance characterization; simultaneous
multithreading; SMT; software-controlled
prioritization",
}
@Article{Shye:2008:LLR,
author = "Alex Shye and Berkin Ozisikyilmaz and Arindam Mallik
and Gokhan Memik and Peter A. Dinda and Robert P. Dick
and Alok N. Choudhary",
title = "Learning and Leveraging the Relationship between
Architecture-Level Measurements and Individual User
Satisfaction",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "427--438",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382158",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The ultimate goal of computer design is to satisfy the
end-user. In particular computing domains, such as
interactive applications, there exists a variation in
user expectations and user satisfaction relative to the
performance of existing computer systems. In this work,
we leverage this variation to develop more efficient
architectures that are customized to end-users. We
first investigate the relationship between
microarchitectural parameters and user satisfaction.
Specifically, we analyze the relationship between
hardware performance counter (HPC) readings and
individual satisfaction levels reported by users for
representative applications. Our results show that the
satisfaction of the user is strongly correlated to the
performance of the underlying hardware. More
importantly, the results show that user satisfaction is
highly user-dependent. To take advantage of these
observations, we develop a framework called
Individualized Dynamic Voltage and Frequency Scaling
(iDVFS). We study a group of users to characterize the
relationship between the HPCs and individual user
satisfaction levels. Based on this analysis, we use
artificial neural networks to model the function from
HPCs to user satisfaction for individual users. This
model is then used online to predict user satisfaction
and set the frequency level accordingly. A second set
of user studies demonstrates that iDVFS reduces the CPU
power consumption by over 25\% in representative
applications as compared to the Windows XP DVFS
algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dynamic power management; hardware performance
counters; learning user satisfaction; user-aware
architectures",
}
@Article{Kumar:2008:AVO,
author = "Sanjeev Kumar and Daehyun Kim and Mikhail Smelyanskiy
and Yen-Kuang Chen and Jatin Chhugani and Christopher
J. Hughes and Changkyu Kim and Victor W. Lee and
Anthony D. Nguyen",
title = "Atomic Vector Operations on Chip Multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "441--452",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382154",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The current trend is for processors to deliver
dramatic improvements in parallel performance while
only modestly improving serial performance. Parallel
performance is harvested through vector/SIMD
instructions as well as multithreading (through both
multithreaded cores and chip multiprocessors). Vector
parallelism can be more efficiently supported than
multithreading, but is often harder for software to
exploit. In particular, code with sparse data access
patterns cannot easily utilize the vector/SIMD
instructions of mainstream processors. Hardware to
scatter and gather sparse data has previously been
proposed to enable vector execution for these codes.
However, on multithreaded architectures, a number of
applications spend significant time on atomic
operations (e.g., parallel reductions), which cannot be
vectorized using previously proposed schemes. This
paper proposes architectural support for atomic vector
operations (referred to as GLSC) that addresses this
limitation. GLSC extends scatter-gather hardware to
support atomic memory operations. Our experiments show
that the GLSC provides an average performance
improvement on a set of important RMS kernels of 54\%
for 4-wide SIMD.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "locks; multiprocessors; reductions; SIMD; vector",
}
@Article{Loh:2008:SMA,
author = "Gabriel H. Loh",
title = "{$3$D}-Stacked Memory Architectures for Multi-core
Processors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "453--464",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382159",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Three-dimensional integration enables stacking memory
directly on top of a microprocessor, thereby
significantly reducing wire delay between the two.
Previous studies have examined the performance benefits
of such an approach, but all of these works only
consider commodity 2D DRAM organizations. In this work,
we explore more aggressive 3D DRAM organizations that
make better use of the additional die-to-die bandwidth
provided by 3D stacking, as well as the additional
transistor count. Our simulation results show that with
a few simple changes to the 3D-DRAM organization, we
can achieve a 1.75x speedup over previously proposed
3D-DRAM approaches on our memory-intensive
multi-programmed workloads on a quad-core processor.
The significant increase in memory system performance
makes the L2 miss handling architecture (MHA) a new
bottleneck, which we address by combining a novel data
structure called the Vector Bloom Filter with dynamic
MSHR capacity tuning. Our scalable L2 MHA yields an
additional 17.8\% performance improvement over our
3D-stacked memory architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "3D integration; memory; multi-core",
}
@Article{Anonymous:2008:AI,
author = "Anonymous",
title = "Author Index",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "465--466",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382160",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2008:PI,
author = "Anonymous",
title = "{Publisher}'s Information",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "468--468",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382161",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Anonymous:2008:CA,
author = "Anonymous",
title = "Cover Art",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "3",
pages = "C1--C1",
month = jun,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1394608.1382162",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Aug 6 08:35:03 MDT 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Karne:2008:OSC,
author = "Ramesh K. Karne and Alexander L. Wijesinha and George
H. {Ford, Jr.}",
title = "Opinion: stay on course with an evolution or choose a
revolution in computing",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "4",
pages = "1--6",
month = sep,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1462609.1462611",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Dec 8 14:01:02 MST 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2008:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "4",
pages = "7--11",
month = sep,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1462609.1462613",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Dec 8 14:01:02 MST 2008",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bengtsson:2008:DSA,
author = "Jerker Bengtsson and Bertil Svensson",
title = "A domain-specific approach for software development on
{Manycore} platforms",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "2--10",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556446",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The programming complexity of increasingly parallel
processors calls for new tools that assist programmers
in utilising the parallel hardware resources. In this
paper we present a set of models that we have developed
as part of a tool for mapping dataflow graphs onto
manycores. One of the models captures the essentials of
manycores identified as suitable for signal processing,
and which we use as target for our algorithms. As an
intermediate representation we introduce timed
configuration graphs, which describe the mapping of a
model of an application onto a machine model. Moreover,
we show how a timed configuration graph by very simple
means can be evaluated using an abstract interpretation
to obtain performance feedback. This information can be
used by our tool and by the programmer in order to
discover improved mappings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cederman:2008:SLB,
author = "Daniel Cederman and Philippas Tsigas",
title = "On sorting and load balancing on {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "11--18",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556447",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper we take a look at GPU-Quicksort, an
efficient Quicksort algorithm suitable for the highly
parallel multi-core graphics processors. Quicksort had
previously been considered an inefficient sorting
solution for graphics processors, but GPU-Quicksort
often performs better than the fastest known sorting
implementations for graphics processors, such as radix
and bitonic sort. Quicksort can thus be seen as a
viable alternative for sorting large quantities of data
on graphics processors.\par
We also take look at a comparison of different load
balancing schemes. To get maximum performance on the
many-core graphics processors it is important to have
an even balance of the workload so that all processing
units contribute equally to the task at hand. This can
be hard to achieve when the cost of a task is not known
beforehand and when new sub-tasks are created
dynamically during execution. With the recent advent of
scatter operations and atomic hardware primitives it is
now possible to bring some of the more elaborate
dynamic load balancing schemes from the conventional
SMP systems domain to the graphics processor domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ha:2008:NBP,
author = "Phuong Hoai Ha and Philippas Tsigas and Otto J.
Anshus",
title = "Non-blocking programming on multi-core graphics
processors: (extended abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "19--28",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556448",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper investigates the synchronization power of
coalesced memory accesses, a family of memory access
mechanisms introduced in recent large multicore
architectures like the CUDA graphics processors. We
first design three memory access models to capture the
fundamental features of the new memory access
mechanisms. Subsequently, we prove the exact
synchronization power of these models in terms of their
consensus numbers. These tight results show that the
coalesced memory access mechanisms can facilitate
strong synchronization between the threads of multicore
processors, without the need of synchronization
primitives other than reads and writes.\par
Moreover, based on the intrinsic features of recent GPU
architectures, we construct strong synchronization
objects like wait-free and t-resilient
read-modify-write objects for a general model of recent
GPU architectures without strong hardware
synchronization primitives like test-and-set and
compare-and-swap. Accesses to the wait-free objects
have time complexity $ O(N) $, where $N$ is the number
of processes. Our result demonstrates that it is
possible to construct waitfree synchronization
mechanisms for GPUs without the need of strong
synchronization primitives in hardware and that
wait-free programming is possible for GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhattacharyya:2008:ODT,
author = "Shuvra S. Bhattacharyya and Gordon Brebner and
J{\"o}rn W. Janneck and Johan Eker and Carl von Platen
and Marco Mattavelli and Micka{\"e}l Raulet",
title = "{OpenDF}: a dataflow toolset for reconfigurable
hardware and multicore systems",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "29--35",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556449",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents the OpenDF framework and recalls
that dataflow programming was once invented to address
the problem of parallel computing. We discuss the
problems with an imperative style, von Neumann
programs, and present what we believe are the
advantages of using a dataflow programming model. The
CAL actor language is briefly presented and its role in
the ISO/MPEG standard is discussed. The Dataflow
Interchange Format (DIF) and related tools can be used
for analysis of actors and networks, demonstrating the
advantages of a dataflow approach. Finally, an overview
of a case study implementing an MPEG- 4 decoder is
given.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kessler:2008:OCP,
author = "Christoph W. Kessler and J{\"o}rg Keller",
title = "Optimized on-chip pipelining of memory-intensive
computations on the cell {BE}",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "36--45",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556450",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Multiprocessors-on-chip, such as the Cell BE
processor, regularly suffer from restricted bandwidth
to off-chip main memory. We propose to reduce memory
bandwidth requirements, and thus increase performance,
by expressing our application as a task graph, by
running dependent tasks concurrently and by pipelining
results directly from task to task where possible,
instead of buffering in off-chip memory. To maximize
bandwidth savings and balance load simultaneously, we
solve a mapping problem of tasks to SPEs on the Cell
BE. We present three approaches: an integer linear
programming formulation that allows to compute
Paretooptimal mappings for smaller task graphs, general
heuristics, and a problem specific approximation
algorithm. We validate the mappings for dataparallel
computations and sorting.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lundvall:2008:APS,
author = "H{\aa}kan Lundvall and Kristian Stav{\aa}ker and Peter
Fritzson and Christoph Kessler",
title = "Automatic parallelization of simulation code for
equation-based models with software pipelining and
measurements on three platforms",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "46--55",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556451",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this work we report results from a new integrated
method of automatically generating parallel code from
Modelica models by combining parallelization at two
levels of abstraction. Performing inline expansion of a
Runge--Kutta solver combined with fine-grained
automatic parallelization of the right-hand side of the
resulting equation system opens up new possibilities
for generating high performance code, which is becoming
increasingly relevant when multi-core computers are
becoming commonplace. An implementation, in the form of
a backend module for the OpenModelica compiler, has
been developed and used for measurements on two
architectures: Intel Xeon and SGI Altix 3700 Bx2. This
paper also contains some very recent results of a
prototype implementation of this parallelization
approach on the Cell BE processor architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fang:2008:SDA,
author = "Huan Fang and Mats Brorsson",
title = "Scalable directory architecture for distributed shared
memory chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "56--64",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556452",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Traditional Directory-based cache coherence protocol
is far from optimal for large-scale cache coherent
shared memory multiprocessors due to the increasing
latency to access directories stored in DRAM memory.
Instead of keeping directories in main memory, we
consider distributing the directory together with L2
cache across all nodes on a Chip Multiprocessor. Each
node contains a processing unit, a private L1 cache, a
slice of L2 cache, memory controller and a router. Both
L2 cache and memories are distributed shared and
interleaved by a subset of memory address bits. All
nodes are interconnected through a low latency two
dimensional Mesh network. Directory, being a split
component to L2 cache, only stores sharing information
for blocks while L2 cache stores only data blocks
exclusive with L1 cache. Shared L2 cache can increase
total effective cache capacity on chip, but also
increase the miss latency when data is on a remote
node. Being different from Directory Cache structure,
our proposal totally removes the directory from memory,
which saves memory space and reduces access latency.
Compared to L2 cache that combines directory
information internally, our L2 cache structure saves up
to 88\% cache space and achieves similar performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jonsson:2008:SSE,
author = "Bengt Jonsson",
title = "State-space exploration for concurrent algorithms
under weak memory orderings: (preliminary version)",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "65--71",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556453",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Several concurrent implementations of familiar data
abstractions such as queues, sets, or maps typically do
not follow locking disciplines, and often use lock-free
synchronization to gain performance. Since such
algorithms are exposed to a weak memory model, they are
notoriously hard to get correct, as witnessed by many
bugs found in published algorithms. We outline a
technique for analyzing correctness of concurrent
algorithms under weak memory models, in which a model
checker is used to search for correctness violations.
The algorithm to be analyzed is transformed into a form
where statements may be reordered according to a
particular weak memory ordering. The transformed
algorithm can then be analyzed by a model-checking
tool, e.g., by enumerative state exploration. We
illustrate the approach on a small example of a queue,
which allows an enqueue operation to be concurrent with
a dequeue operation, which we analyze with respect to
the RMO memory model defined in SPARC v9.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Abdulla:2008:MCR,
author = "Parosh Aziz Abdulla and Fr{\'e}d{\'e}ric Haziza and
Mats Kindahl",
title = "Model checking race-freeness",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "72--79",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556454",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the introduction of highly concurrent systems in
standard desktop computers, ensuring correctness of
industrial-size concurrent programs is becoming
increasingly important. One of the most important
standards in use for developing multi-threaded programs
is the POSIX Threads standard, commonly known as
PThreads. Of particular importance, the analysis of
industrial code should, as far as possible, be
automatic and not require annotations or other forms of
specifications of the code.\par
Model checking has been one of the most successful
approaches to program verification during the last two
decades. The size and complexity of applications which
can be handled have increased rapidly through
integration with symbolic techniques. These methods are
designed to work on finite (but large) state spaces.
This framework fails to deal with several essential
aspects of behaviours for multithreaded programs: there
is no bound a priori on the number of threads which may
arise in a given run of the system; each thread
manipulates local variables which often range over
unbounded domains; and the system has a dynamic
structure in the sense that threads can be created and
killed throughout execution of the system. In this
paper we concentrate on checking a particular class of
properties for concurrent programs, namely safety
properties. In particular, we focus on race-freeness,
that is, the absence of race conditions (also known as
data races) in shared-variable pthreaded
programs.\par
We will follow a particular methodology which we have
earlier developed for model checking general classes of
infinite-state systems [1, 3, 6, 8, 9] and apply a
symbolic backward reachability analysis to verify the
safety property. Since we construct a model as an
over-approximation of the original program, proving the
safety property in the model implies that the property
also holds in the original system. Surprisingly, it
leads to a quite efficient analysis which can be
carried out fully automatically.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sundell:2008:NNB,
author = "Hakan Sundell and Philippas Tsigas",
title = "{NOBLE}: non-blocking programming support via
lock-free shared abstract data types",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "80--87",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556455",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "An essential part of programming for multi-core and
multi-processor includes efficient and reliable means
for sharing data. Lock-free data structures are known
as very suitable for this purpose, although experienced
to be very complex to design. In this paper, we present
a software library of non-blocking abstract data types
that have been designed to facilitate lock-free
programming for non-experts. The system provides: (i)
efficient implementations of the most commonly used
data types in concurrent and sequential software
design, (ii) a lock-free memory management system, and
(iii) a run time-system. The library provides clear
semantics that are at least as strong as those of
corresponding lock-based implementations of the
respective data types. Our software library can be used
for facilitating lockfree programming; its design
enables the programmer to: (i) replace lock-based
components of sequential or parallel code easily and
efficiently , (ii) use well-tuned concurrent algorithms
inside a software or hardware transactional system. In
the paper we describe the design and functionality of
the system. We also provide experimental results that
show that the library can considerably improve the
performance of software systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gidenstam:2008:LLF,
author = "Anders Gidenstam and Marina Papatriantafilou",
title = "{LFTHREADS}: a lock-free thread library",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "88--92",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556456",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This extended abstract presents LFTHREADS, a thread
library entirely based on lock-free methods, i.e. no
spinlocks or similar synchronization mechanisms are
employed in the implementation of the multithreading.
Since lockfreedom is highly desirable in
multiprocessors/multicores due to its advantages in
parallelism, fault-tolerance, convoy-avoidance and
more, there is an increased demand in lock-free methods
in parallel applications, hence also in
multiprocessor/multicore system services. LFTHREADS is
the first thread library that provides a lock-free
implementation of blocking synchronization primitives
for application threads; although the latter may sound
like a contradicting goal, such objects have several
benefits: e.g. library operations that block and
unblock threads on the same synchronization object can
make progress in parallel while maintaining the desired
thread-level semantics and without having to wait for
any 'low' operations among them. Besides, as no
spin-locks or similar synchronization mechanisms are
employed, memory contention can be reduced and
processors/cores are able to do useful work. As a
consequence, applications, too, can enjoy enhanced
parallelism and fault-tolerance. For the
synchronization in LFTHREADS we have introduced a new
method, which we call responsibility hand-off (RHO),
that does not need any special kernel support. The RHO
method is also of independent interest, as it can also
serve as a tool for lock-free token passing, management
of contention and interaction between scheduling and
synchronization. This paper gives an outline and the
context of LFTHREADS. For more details the reader is
referred to [7] and [8].",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Faxen:2008:WWS,
author = "Karl-Filip Fax{\'e}n",
title = "{Wool} --- a work stealing library",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "93--100",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556457",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents some preliminary results on a
small light weight user level task management library
called Wool. The Wool task scheduler is based on work
stealing. The objective of the library is to provide a
reasonably convenient programming interface (in
particular by not forcing the programmer to write in
continuation passing style) in ordinary C while still
having a very low task creation overhead. Several task
scheduling systems based on work stealing exists, but
they are typically either programming languages like
Cilk-5 or based on C++ like the Intel TBB or C\# as in
the Microsoft TPL. Our main conclusions are that such a
direct style interface is indeed possible and yields
performance that is comparable to that of the Intel
TBB.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2008:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "101--111",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556459",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gebhart:2009:ETC,
author = "Mark Gebhart and Bertrand A. Maher and Katherine E.
Coons and Jeff Diamond and Paul Gratz and Mario Marino
and Nitya Ranganathan and Behnam Robatmili and Aaron
Smith and James Burrill and Stephen W. Keckler and Doug
Burger and Kathryn S. McKinley",
title = "An evaluation of the {TRIPS} computer system",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "1--12",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508246",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The TRIPS system employs a new instruction set
architecture (ISA) called Explicit Data Graph Execution
(EDGE) that renegotiates the boundary between hardware
and software to expose and exploit concurrency. EDGE
ISAs use a block-atomic execution model in which blocks
are composed of dataflow instructions. The goal of the
TRIPS design is to mine concurrency for high
performance while tolerating emerging technology
scaling challenges, such as increasing wire delays and
power consumption. This paper evaluates how well TRIPS
meets this goal through a detailed ISA and performance
analysis. We compare performance, using cycles counts,
to commercial processors. On SPEC CPU2000, the Intel
Core 2 outperforms compiled TRIPS code in most cases,
although TRIPS matches a Pentium 4. On simple
benchmarks, compiled TRIPS code outperforms the Core 2
by 10\% and hand-optimized TRIPS code outperforms it by
factor of 3. Compared to conventional ISAs, the
block-atomic model provides a larger instruction
window, increases concurrency at a cost of more
instructions executed, and replaces register and memory
accesses with more efficient direct
instruction-to-instruction communication. Our analysis
suggests ISA, microarchitecture, and compiler
enhancements for addressing weaknesses in TRIPS and
indicates that EDGE architectures have the potential to
exploit greater concurrency in future technologies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Pistol:2009:AIN,
author = "Constantin Pistol and Wutichai Chongchitmate and
Christopher Dwyer and Alvin R. Lebeck",
title = "Architectural implications of nanoscale integrated
sensing and computing",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "13--24",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508247",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper explores the architectural implications of
integrating computation and molecular probes to form
nanoscale sensor processors (nSP). We show how nSPs may
enable new computing domains and automate tasks that
currently require expert scientific training and costly
equipment. This new application domain severely
constrains nSP size, which significantly impacts the
architectural design space. In this context, we explore
nSP architectures and present an nSP design that
includes a simple accumulator-based ISA, sensors,
limited memory and communication transceivers. To
reduce the application memory footprint, we introduce
the concept of instruction-fused sensing. We use
simulation and analytical models to evaluate nSP
designs executing a representative set of target
applications. Furthermore, we propose a candidate nSP
technology based on optical Resonance Energy Transfer
(RET) logic that enables the small size required by the
application domain; our smallest design is about the
size of the largest known virus. We also show
laboratory results that demonstrate initial steps
towards a prototype.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Park:2009:CEA,
author = "Soyeon Park and Shan Lu and Yuanyuan Zhou",
title = "{CTrigger}: exposing atomicity violation bugs from
their hiding places",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "25--36",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508249",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Multicore hardware is making concurrent programs
pervasive. Unfortunately, concurrent programs are prone
to bugs. Among different types of concurrency bugs,
atomicity violation bugs are common and important.
Existing techniques to detect atomicity violation bugs
suffer from one limitation: requiring bugs to manifest
during monitored runs, which is an open problem in
concurrent program testing. This paper makes two
contributions. First, it studies the interleaving
characteristics of the common practice in concurrent
program testing (i.e., running a program over and over)
to understand why atomicity violation bugs are hard to
expose. Second, it proposes CTrigger to effectively and
efficiently expose atomicity violation bugs in large
programs. CTrigger focuses on a special type of
interleavings (i.e., unserializable interleavings) that
are inherently correlated to atomicity violation bugs,
and uses trace analysis to systematically identify
(likely) feasible unserializable interleavings with low
occurrence-probability. CTrigger then uses minimum
execution perturbation to exercise low-probability
interleavings and expose difficult-to-catch atomicity
violation. We evaluate CTrigger with real-world
atomicity violation bugs from four sever/desktop
applications (Apache, MySQL, Mozilla, and PBZIP2) and
three SPLASH2 applications on 8-core machines. CTrigger
efficiently exposes the tested bugs within 1--235
seconds, two to four orders of magnitude faster than
stress testing. Without CTrigger, some of these bugs do
not manifest even after 7 full days of stress testing.
In addition, without deterministic replay support, once
a bug is exposed, CTrigger can help programmers
reliably reproduce it for diagnosis. Our tested bugs
are reproduced by CTrigger mostly within 5 seconds, 300
to over 60000 times faster than stress testing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Sidiroglou:2009:AAS,
author = "Stelios Sidiroglou and Oren Laadan and Carlos Perez
and Nicolas Viennot and Jason Nieh and Angelos D.
Keromytis",
title = "{ASSURE}: automatic software self-healing using rescue
points",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "37--48",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508250",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software failures in server applications are a
significant problem for preserving system availability.
We present ASSURE, a system that introduces rescue
points that recover software from unknown faults while
maintaining both system integrity and availability, by
mimicking system behavior under known error conditions.
Rescue points are locations in existing application
code for handling a given set of programmer-anticipated
failures, which are automatically repurposed and tested
for safely enabling fault recovery from a larger class
of (unanticipated) faults. When a fault occurs at an
arbitrary location in the program, ASSURE restores
execution to an appropriate rescue point and induces
the program to recover execution by virtualizing the
program's existing error-handling facilities. Rescue
points are identified using fuzzing, implemented using
a fast coordinated checkpoint-restart mechanism that
handles multi-process and multi-threaded applications,
and, after testing, are injected into production code
using binary patching. We have implemented an ASSURE
Linux prototype that operates without application
source code and without base operating system kernel
changes. Our experimental results on a set of
real-world server applications and bugs show that
ASSURE enabled recovery for all of the bugs tested with
fast recovery times, has modest performance overhead,
and provides automatic self-healing orders of magnitude
faster than current human-driven patch deployment
methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Lenharth:2009:RDO,
author = "Andrew Lenharth and Vikram S. Adve and Samuel T.
King",
title = "Recovery domains: an organizing principle for
recoverable operating systems",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "49--60",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508251",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We describe a strategy for enabling existing commodity
operating systems to recover from unexpected run-time
errors in nearly any part of the kernel, including core
kernel components. Our approach is dynamic and
request-oriented; it isolates the effects of a fault to
the requests that caused the fault rather than to
static kernel components. This approach is based on a
notion of ``recovery domains,'' an organizing principle
to enable rollback of state affected by a request in a
multithreaded system with minimal impact on other
requests or threads. We have applied this approach on
v2.4.22 and v2.6.27 of the Linux kernel and it required
132 lines of changed or new code: the other changes are
all performed by a simple instrumentation pass of a
compiler. Our experiments show that the approach is
able to recover from otherwise fatal faults with
minimal collateral impact during a recovery event.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Dimitrov:2009:ABB,
author = "Martin Dimitrov and Huiyang Zhou",
title = "Anomaly-based bug prediction, isolation, and
validation: an automated approach for software
debugging",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "61--72",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508252",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software defects, commonly known as bugs, present a
serious challenge for system reliability and
dependability. Once a program failure is observed, the
debugging activities to locate the defects are
typically nontrivial and time consuming. In this paper,
we propose a novel automated approach to pin-point the
root-causes of software failures. Our proposed approach
consists of three steps. The first step is bug
prediction, which leverages the existing work on
anomaly-based bug detection as exceptional behavior
during program execution has been shown to frequently
point to the root cause of a software failure. The
second step is bug isolation, which eliminates
false-positive bug predictions by checking whether the
dynamic forward slices of bug predictions lead to the
observed program failure. The last step is bug
validation, in which the isolated anomalies are
validated by dynamically nullifying their effects and
observing if the program still fails. The whole bug
prediction, isolation and validation process is fully
automated and can be implemented with efficient
architectural support. Our experiments with 6 programs
and 7 bugs, including a real bug in the gcc 2.95.2
compiler, show that our approach is highly effective at
isolating only the relevant anomalies. Compared to
state-of-art debugging techniques, our proposed
approach pinpoints the defect locations more accurately
and presents the user with a much smaller code set to
analyze.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Montesinos:2009:CSH,
author = "Pablo Montesinos and Matthew Hicks and Samuel T. King
and Josep Torrellas",
title = "{Capo}: a software-hardware interface for practical
deterministic multiprocessor replay",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "73--84",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508254",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "While deterministic replay of parallel programs is a
powerful technique, current proposals have
shortcomings. Specifically, software-based replay
systems have high overheads on multiprocessors, while
hardware-based proposals focus only on basic
hardware-level mechanisms, ignoring the overall replay
system. To be practical, hardware-based replay systems
need to support an environment with multiple parallel
jobs running concurrently --- some being recorded,
others being replayed and even others running without
recording or replay. Moreover, they need to manage
limited-size log buffers. This paper addresses these
shortcomings by introducing, for the first time, a set
of abstractions and a software-hardware interface for
practical hardware-assisted replay of multiprocessor
systems. The approach, called Capo, introduces the
novel abstraction of the Replay Sphere to separate the
responsibilities of the hardware and software
components of the replay system. In this paper, we also
design and build CapoOne, a prototype of a
deterministic multiprocessor replay system that
implements Capo using Linux and simulated DeLorean
hardware. Our evaluation of 4-processor executions
shows that CapoOne largely records with the efficiency
of hardware-based schemes and the flexibility of
software-based schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Devietti:2009:DDS,
author = "Joseph Devietti and Brandon Lucia and Luis Ceze and
Mark Oskin",
title = "{DMP}: deterministic shared memory multiprocessing",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "85--96",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508255",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Current shared memory multicore and multiprocessor
systems are nondeterministic. Each time these systems
execute a multithreaded application, even if supplied
with the same input, they can produce a different
output. This frustrates debugging and limits the
ability to properly test multithreaded code, becoming a
major stumbling block to the much-needed widespread
adoption of parallel programming. In this paper we make
the case for fully deterministic shared memory
multiprocessing (DMP). The behavior of an arbitrary
multithreaded program on a DMP system is only a
function of its inputs. The core idea is to make
inter-thread communication fully deterministic.
Previous approaches to coping with nondeterminism in
multithreaded programs have focused on replay, a
technique useful only for debugging. In contrast, while
DMP systems are directly useful for debugging by
offering repeatability by default, we argue that
parallel programs should execute deterministically in
the field as well. This has the potential to make
testing more assuring and increase the reliability of
deployed multithreaded software. We propose a range of
approaches to enforcing determinism and discuss their
implementation trade-offs. We show that determinism can
be provided with little performance cost using our
architecture proposals on future hardware, and that
software-only approaches can be utilized on existing
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Olszewski:2009:KED,
author = "Marek Olszewski and Jason Ansel and Saman
Amarasinghe",
title = "{Kendo}: efficient deterministic multithreading in
software",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "97--108",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508256",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Although chip-multiprocessors have become the industry
standard, developing parallel applications that target
them remains a daunting task. Non-determinism, inherent
in threaded applications, causes significant challenges
for parallel programmers by hindering their ability to
create parallel applications with repeatable results.
As a consequence, parallel applications are
significantly harder to debug, test, and maintain than
sequential programs. This paper introduces Kendo: a new
software-only system that provides deterministic
multithreading of parallel applications. Kendo enforces
a deterministic interleaving of lock acquisitions and
specially declared non-protected reads through a novel
dynamically load-balanced deterministic scheduling
algorithm. The algorithm tracks the progress of each
thread using performance counters to construct a
deterministic logical time that is used to compute an
interleaving of shared data accesses that is both
deterministic and provides good load balancing. Kendo
can run on today's commodity hardware while incurring
only a modest performance cost. Experimental results on
the SPLASH-2 applications yield a geometric mean
overhead of only 16\% when running on 4 processors.
This low overhead makes it possible to benefit from
Kendo even after an application is deployed.
Programmers can start using Kendo today to program
parallel applications that are easier to develop,
debug, and test.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Tiwari:2009:CIF,
author = "Mohit Tiwari and Hassan M. G. Wassel and Bita Mazloom
and Shashidhar Mysore and Frederic T. Chong and Timothy
Sherwood",
title = "Complete information flow tracking from the gates up",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "109--120",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508258",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "For many mission-critical tasks, tight guarantees on
the flow of information are desirable, for example,
when handling important cryptographic keys or sensitive
financial data. We present a novel architecture capable
of tracking all information flow within the machine,
including all explicit data transfers and all implicit
flows (those subtly devious flows caused by not
performing conditional operations). While the problem
is impossible to solve in the general case, we have
created a machine that avoids the general-purpose
programmability that leads to this impossibility
result, yet is still programmable enough to handle a
variety of critical operations such as public-key
encryption and authentication. Through the application
of our novel gate-level information flow tracking
method, we show how all flows of information can be
precisely tracked. From this foundation, we then
describe how a class of architectures can be
constructed, from the gates up, to completely capture
all information flows and we measure the impact of
doing so on the hardware implementation, the ISA, and
the programmer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Tam:2009:RAL,
author = "David K. Tam and Reza Azimi and Livio B. Soares and
Michael Stumm",
title = "{RapidMRC}: approximating {L2} miss rate curves on
commodity systems for online optimizations",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "121--132",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508259",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Miss rate curves (MRCs) are useful in a number of
contexts. In our research, online L2 cache MRCs enable
us to dynamically identify optimal cache sizes when
cache-partitioning a shared-cache multicore processor.
Obtaining L2 MRCs has generally been assumed to be
expensive when done in software and consequently, their
usage for online optimizations has been limited. To
address these problems and opportunities, we have
developed a low-overhead software technique to obtain
L2 MRCs online on current processors, exploiting
features available in their performance monitoring
units so that no changes to the application source code
or binaries are required. Our technique, called
RapidMRC, requires a single probing period of roughly
221 million processor cycles (147 ms), and subsequently
124 million cycles (83 ms) to process the data. We
demonstrate its accuracy by comparing the obtained MRCs
to the actual L2 MRCs of 30 applications taken from
SPECcpu2006, SPECcpu2000, and SPECjbb2000. We show that
RapidMRC can be applied to sizing cache partitions,
helping to achieve performance improvements of up to
27\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Eyerman:2009:PTC,
author = "Stijn Eyerman and Lieven Eeckhout",
title = "Per-thread cycle accounting in {SMT} processors",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "133--144",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508260",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes a cycle accounting architecture
for Simultaneous Multithreading (SMT) processors that
estimates the execution times for each of the threads
had they been executed alone, while they are running
simultaneously on the SMT processor. This is done by
accounting each cycle to either a base, miss event or
waiting cycle component during multi-threaded
execution. Single-threaded alone execution time is then
estimated as the sum of the base and miss event
components; the waiting cycle component represents the
lost cycle count due to SMT execution. The cycle
accounting architecture incurs reasonable hardware cost
(around 1KB of storage) and estimates single-threaded
performance with average prediction errors around 7.2\%
for two-program workloads and 11.7\% for four-program
workloads. The cycle accounting architecture has
several important applications to system software and
its interaction with SMT hardware. For one, the
estimated single-thread alone execution time provides
an accurate picture to system software of the actually
consumed processor cycles per thread. The alone
execution time instead of the total execution time
(timeslice) may make system software scheduling
policies more effective. Second, a new class of
thread-progress aware SMT fetch policies based on
per-thread progress indicators enable system software
level priorities to be enforced at the hardware
level.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Hofmann:2009:MBM,
author = "Owen S. Hofmann and Christopher J. Rossbach and Emmett
Witchel",
title = "Maximum benefit from a minimal {HTM}",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "145--156",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508262",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A minimal, bounded hardware transactional memory
implementation significantly improves synchronization
performance when used in an operating system kernel. We
add HTM to Linux 2.4, a kernel with a simple,
coarse-grained synchronization structure. The
transactional Linux 2.4 kernel can improve performance
of user programs by as much as 40\% over the
non-transactional 2.4 kernel. It closes 68\% of the
performance gap with the Linux 2.6 kernel, which has
had significant engineering effort applied to improve
scalability. We then extend our minimal HTM to a fast,
unbounded transactional memory with a novel technique
for coordinating hardware transactions and software
synchronization. Overflowed transactions run in
software, with only a minimal coupling between hardware
and software systems. There is no performance penalty
for overflow rates of less than 1\%. In one instance,
at 16 processors and an overflow rate of 4\%,
performance degrades from an ideal 4.3x to 3.6x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Dice:2009:EEC,
author = "Dave Dice and Yossi Lev and Mark Moir and Daniel
Nussbaum",
title = "Early experience with a commercial hardware
transactional memory implementation",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "157--168",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508263",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We report on our experience with the hardware
transactional memory (HTM) feature of two
pre-production revisions of a new commercial multicore
processor. Our experience includes a number of
promising results using HTM to improve performance in a
variety of contexts, and also identifies some ways in
which the feature could be improved to make it even
better. We give detailed accounts of our experiences,
sharing techniques we used to achieve the results we
have, as well as describing challenges we faced in
doing so.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Wells:2009:MMM,
author = "Philip M. Wells and Koushik Chakraborty and Gurindar
S. Sohi",
title = "Mixed-mode multicore reliability",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "169--180",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508265",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Future processors are expected to observe increasing
rates of hardware faults. Using Dual-Modular Redundancy
(DMR), two cores of a multicore can be loosely coupled
to redundantly execute a single software thread,
providing very high coverage from many difference
sources of faults. This reliability, however, comes at
a high price in terms of per-thread IPC and overall
system throughput. We make the observation that a user
may want to run both applications requiring high
reliability, such as financial software, and more fault
tolerant applications requiring high performance, such
as media or web software, on the same machine at the
same time. Yet a traditional DMR system must fully
operate in redundant mode whenever any application
requires high reliability. This paper proposes a
Mixed-Mode Multicore (MMM), which enables most
applications, including the system software, to run
with high reliability in DMR mode, while applications
that need high performance can avoid the penalty of
DMR. Though conceptually simple, two key challenges
arise: (1) care must be taken to protect reliable
applications from any faults occurring to applications
running in high performance mode, and (2) the desire to
execute additional independent software threads for a
performance application complicates the scheduling of
computation to cores. After solving these issues, an
MMM is shown to improve overall system performance,
compared to a traditional DMR system, by approximately
2X when one reliable and one performance application
are concurrently executing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Rajamani:2009:IDE,
author = "Sriram Rajamani and G. Ramalingam and Venkatesh Prasad
Ranganath and Kapil Vaswani",
title = "{ISOLATOR}: dynamically ensuring isolation in
comcurrent programs",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "181--192",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508266",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we focus on concurrent programs that
use locks to achieve isolation of data accessed by
critical sections of code. We present ISOLATOR, an
algorithm that guarantees isolation for well-behaved
threads of a program that obey a locking discipline
even in the presence of ill-behaved threads that
disobey the locking discipline. ISOLATOR uses code
instrumentation, data replication, and virtual memory
protection to detect isolation violations and delays
ill-behaved threads to ensure isolation. Our
instrumentation scheme requires access only to the code
of well-behaved threads. We have evaluated ISOLATOR on
several benchmark programs and found that ISOLATOR can
ensure isolation with reasonable runtime overheads. In
addition, we present three general desiderata ---
safety, isolation, and permissiveness --- for any
scheme that attempts to ensure isolation, and formally
prove that ISOLATOR satisfies all of these
desiderata.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Tucek:2009:EOV,
author = "Joseph Tucek and Weiwei Xiong and Yuanyuan Zhou",
title = "Efficient online validation with delta execution",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "193--204",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508267",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software systems are constantly changing. Patches to
fix bugs and patches to add features are all too
common. Every change risks breaking a previously
working system. Hence administrators loathe change, and
are willing to delay even critical security patches
until after fully validating their correctness.
Compared to off-line validation, on-line validation has
clear advantages since it tests against real life
workloads. Yet unfortunately it imposes restrictive
overheads as it requires running the old and new
versions side-by-side. Moreover, due to spurious
differences (e.g. event timing, random number
generation, and thread interleavings), it is difficult
to compare the two for validation. To allow more
effective on-line patch validation, we propose a new
mechanism, called delta execution, that is based on the
observation that most patches are small. Delta
execution merges the two side-by-side executions for
most of the time and splits only when necessary, such
as when they access different data or execute different
code. This allows us to perform on-line validation not
only with lower overhead but also with greatly reduced
spurious differences, allowing us to effectively
validate changes. We first validate the feasibility of
our idea by studying the characteristics of 240 patches
from 4 server programs; our examination shows that 77\%
of the changes should not be expected to cause large
changes and are thereby feasible for Delta execution.
We then implemented Delta execution using dynamic
instrumentation. Using real world patches from 7 server
applications and 3 other programs, we compared our
implementation of Delta execution against a traditional
side-by-side on-line validation. Delta execution
outperformed traditional validation by up to 128\%;
further, for 3 of the changes, spurious differences
caused the traditional validation to fail completely
while Delta execution succeeded. This demonstrates that
Delta execution can allow administrators to use on-line
validation to confidently ensure the correctness of the
changes they apply.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Meisner:2009:PES,
author = "David Meisner and Brian T. Gold and Thomas F.
Wenisch",
title = "{PowerNap}: eliminating server idle power",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "205--216",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508269",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Data center power consumption is growing to
unprecedented levels: the EPA estimates U.S. data
centers will consume 100 billion kilowatt hours
annually by 2011. Much of this energy is wasted in idle
systems: in typical deployments, server utilization is
below 30\%, but idle servers still consume 60\% of
their peak power draw. Typical idle periods though
frequent--last seconds or less, confounding simple
energy-conservation approaches. In this paper, we
propose PowerNap, an energy-conservation approach where
the entire system transitions rapidly between a
high-performance active state and a near-zero-power
idle state in response to instantaneous load. Rather
than requiring fine-grained power-performance states
and complex load-proportional operation from each
system component, PowerNap instead calls for minimizing
idle power and transition time, which are simpler
optimization goals. Based on the PowerNap concept, we
develop requirements and outline mechanisms to
eliminate idle power waste in enterprise blade servers.
Because PowerNap operates in low-efficiency regions of
current blade center power supplies, we introduce the
Redundant Array for Inexpensive Load Sharing (RAILS), a
power provisioning approach that provides high
conversion efficiency across the entire range of
PowerNap's power demands. Using utilization traces
collected from enterprise-scale commercial deployments,
we demonstrate that, together, PowerNap and RAILS
reduce average server power consumption by 74\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Caulfield:2009:GUF,
author = "Adrian M. Caulfield and Laura M. Grupp and Steven
Swanson",
title = "{Gordon}: using flash memory to build fast,
power-efficient clusters for data-intensive
applications",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "217--228",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508270",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As our society becomes more information-driven, we
have begun to amass data at an astounding and
accelerating rate. At the same time, power concerns
have made it difficult to bring the necessary
processing power to bear on querying, processing, and
understanding this data. We describe Gordon, a system
architecture for data-centric applications that
combines low-power processors, flash memory, and
data-centric programming systems to improve performance
for data-centric applications while reducing power
consumption. The paper presents an exhaustive analysis
of the design space of Gordon systems, focusing on the
trade-offs between power, energy, and performance that
Gordon must make. It analyzes the impact of
flash-storage and the Gordon architecture on the
performance and power efficiency of data-centric
applications. It also describes a novel flash
translation layer tailored to data intensive workloads
and large flash storage arrays. Our data show that,
using technologies available in the near future, Gordon
systems can out-perform disk-based clusters by 1.5$
\times $ and deliver up to 2.5$ \times $ more
performance per Watt.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Gupta:2009:DFT,
author = "Aayush Gupta and Youngjae Kim and Bhuvan Urgaonkar",
title = "{DFTL}: a flash translation layer employing
demand-based selective caching of page-level address
mappings",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "229--240",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508271",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent technological advances in the development of
flash-memory based devices have consolidated their
leadership position as the preferred storage media in
the embedded systems market and opened new vistas for
deployment in enterprise-scale storage systems. Unlike
hard disks, flash devices are free from any mechanical
moving parts, have no seek or rotational delays and
consume lower power. However, the internal
idiosyncrasies of flash technology make its performance
highly dependent on workload characteristics. The poor
performance of random writes has been a cause of major
concern, which needs to be addressed to better utilize
the potential of flash in enterprise-scale
environments. We examine one of the important causes of
this poor performance: the design of the Flash
Translation Layer (FTL), which performs the
virtual-to-physical address translations and hides the
erase-before-write characteristics of flash. We propose
a complete paradigm shift in the design of the core FTL
engine from the existing techniques with our
Demand-based Flash Translation Layer (DFTL), which
selectively caches page-level address mappings. We
develop a flash simulation framework called FlashSim.
Our experimental evaluation with realistic
enterprise-scale workloads endorses the utility of DFTL
in enterprise-scale storage systems by demonstrating:
(i) improved performance, (ii) reduced garbage
collection overhead and (iii) better overload behavior
compared to state-of-the-art FTL schemes. For example,
a predominantly random-write dominant I/O trace from an
OLTP application running at a large financial
institution shows a 78\% improvement in average
response time (due to a 3-fold reduction in operations
of the garbage collector), compared to a
state-of-the-art FTL scheme. Even for the well-known
read-dominant TPC-H benchmark, for which DFTL
introduces additional overheads, we improve system
response time by 56\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Aleen:2009:CAS,
author = "Farhana Aleen and Nathan Clark",
title = "Commutativity analysis for software parallelization:
letting program transformations see the big picture",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "241--252",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508273",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Extracting performance from many-core architectures
requires software engineers to create multi-threaded
applications, which significantly complicates the
already daunting task of software development. One
solution to this problem is automatic compile-time
parallelization, which can ease the burden on software
developers in many situations. Clearly, automatic
parallelization in its present form is not suitable for
many application domains and new compiler analyses are
needed address its shortcomings. In this paper, we
present one such analysis: a new approach for detecting
commutative functions. Commutative functions are
sections of code that can be executed in any order
without affecting the outcome of the application, e.g.,
inserting elements into a set. Previous research on
this topic had one significant limitation, in that the
results of a commutative functions must produce
identical memory layouts. This prevented previous
techniques from detecting functions like malloc, which
may return different pointers depending on the order in
which it is called, but these differing results do not
affect the overall output of the application. Our new
commutativity analysis correctly identify these
situations to better facilitate automatic
parallelization. We demonstrate that this analysis can
automatically extract significant amounts of
parallelism from many applications, and where it is
ineffective it can provide software developers a useful
list of functions that may be commutative provided
semantic program changes that are not automatable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Suleman:2009:ACS,
author = "M. Aater Suleman and Onur Mutlu and Moinuddin K.
Qureshi and Yale N. Patt",
title = "Accelerating critical section execution with
asymmetric multi-core architectures",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "253--264",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508274",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To improve the performance of a single application on
Chip Multiprocessors (CMPs), the application must be
split into threads which execute concurrently on
multiple cores. In multi-threaded applications,
critical sections are used to ensure that only one
thread accesses shared data at any given time. Critical
sections can serialize the execution of threads, which
significantly reduces performance and scalability. This
paper proposes Accelerated Critical Sections (ACS), a
technique that leverages the high-performance core(s)
of an Asymmetric Chip Multiprocessor (ACMP) to
accelerate the execution of critical sections. In ACS,
selected critical sections are executed by a
high-performance core, which can execute the critical
section faster than the other, smaller cores. As a
result, ACS reduces serialization: it lowers the
likelihood of threads waiting for a critical section to
finish. Our evaluation on a set of 12
critical-section-intensive workloads shows that ACS
reduces the average execution time by 34\% compared to
an equal-area 32T-core symmetric CMP and by 23\%
compared to an equal-area ACMP. Moreover, for 7 out of
the 12 workloads, ACS improves scalability by
increasing the number of threads at which performance
saturates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Mytkowicz:2009:PWD,
author = "Todd Mytkowicz and Amer Diwan and Matthias Hauswirth
and Peter F. Sweeney",
title = "Producing wrong data without doing anything obviously
wrong!",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "265--276",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508275",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a surprising result: changing a
seemingly innocuous aspect of an experimental setup can
cause a systems researcher to draw wrong conclusions
from an experiment. What appears to be an innocuous
aspect in the experimental setup may in fact introduce
a significant bias in an evaluation. This phenomenon is
called measurement bias in the natural and social
sciences. Our results demonstrate that measurement bias
is significant and commonplace in computer system
evaluation. By significant we mean that measurement
bias can lead to a performance analysis that either
over-states an effect or even yields an incorrect
conclusion. By commonplace we mean that measurement
bias occurs in all architectures that we tried (Pentium
4, Core 2, and m5 O3CPU), both compilers that we tried
(gcc and Intel's C compiler), and most of the SPEC
CPU2006 C programs. Thus, we cannot ignore measurement
bias. Nevertheless, in a literature survey of 133
recent papers from ASPLOS, PACT, PLDI, and CGO, we
determined that none of the papers with experimental
results adequately consider measurement bias. Inspired
by similar problems and their solutions in other
sciences, we describe and demonstrate two methods, one
for detecting (causal analysis) and one for avoiding
(setup randomization) measurement bias.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Bond:2009:LP,
author = "Michael D. Bond and Kathryn S. McKinley",
title = "Leak pruning",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "277--288",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508277",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Managed languages improve programmer productivity with
type safety and garbage collection, which eliminate
memory errors such as dangling pointers, double frees,
and buffer overflows. However, because garbage
collection uses reachability to over-approximate live
objects, programs may still leak memory if programmers
forget to eliminate the last reference to an object
that will not be used again. Leaks slow programs by
increasing collector workload and frequency. Growing
leaks eventually crash programs. This paper introduces
leak pruning, which keeps programs running by
predicting and reclaiming leaked objects at run time.
It predicts dead objects and reclaims them based on
observing data structure usage patterns. Leak pruning
preserves semantics because it waits for heap
exhaustion before reclaiming objects and poisons
references to objects it reclaims. If the program later
tries to access a poisoned reference, the virtual
machine (VM) throws an error. We show leak pruning has
low overhead in a Java VM and evaluate it on 10 leaking
programs. Leak pruning does not help two programs,
executes five substantial programs 1.6-81X longer, and
executes three programs, including a leak in Eclipse,
for at least 24 hours. In the worst case, leak pruning
defers fatal errors. In the best case, it keeps leaky
programs running with preserved semantics and
consistent throughput.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Wegiel:2009:DPC,
author = "Michal Wegiel and Chandra Krintz",
title = "Dynamic prediction of collection yield for managed
runtimes",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "289--300",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508278",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The growth in complexity of modern systems makes it
increasingly difficult to extract high-performance. The
software stacks for such systems typically consist of
multiple layers and include managed runtime
environments (MREs). In this paper, we investigate
techniques to improve cooperation between these layers
and the hardware to increase the efficacy of automatic
memory management in MREs. General-purpose MREs
commonly implement parallel and/or concurrent garbage
collection and employ compaction to eliminate heap
fragmentation. Moreover, most systems trigger
collection based on the amount of heap a program uses.
Our analysis shows that in many cases this strategy
leads to ineffective collections that are unable to
reclaim sufficient space to justify the incurred cost.
To avoid such collections, we exploit the observation
that dead objects tend to cluster together and form
large, never-referenced, regions in the address space
that correlate well with virtual pages that have not
recently been referenced by the application. We
leverage this correlation to design a new, simple and
light-weight, yield predictor that estimates the amount
of reclaimable space in the heap using hardware page
reference bits. Our predictor allows MREs to avoid
low-yield collections and thereby improve resource
management. We integrate this predictor into three
state-of-the-art parallel compactors, implemented in
the HotSpot JVM, that represent distinct canonical heap
layouts. Our empirical evaluation, based on standard
Java benchmarks and open-source applications, indicates
that inexpensive and accurate yield prediction can
improve performance significantly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Menon:2009:TSA,
author = "Aravind Menon and Simon Schubert and Willy
Zwaenepoel",
title = "{TwinDrivers}: semi-automatic derivation of fast and
safe hypervisor network drivers from guest {OS}
drivers",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "301--312",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508279",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In a virtualized environment, device drivers are often
run inside a virtual machine (VM) rather than in the
hypervisor, for reasons of safety and reduction in
software engineering effort. Unfortunately, this
approach results in poor performance for I/O-intensive
devices such as network cards. The alternative approach
of running device drivers directly in the hypervisor
yields better performance, but results in the loss of
safety guarantees for the hypervisor and incurs
additional software engineering costs. In this paper we
present TwinDrivers, a framework which allows us to
semi-automatically create safe and efficient hypervisor
drivers from guest OS drivers. The hypervisor driver
runs directly in the hypervisor, but its data resides
completely in the driver VM address space. A Software
Virtual Memory mechanism allows the driver to access
its VM data efficiently from the hypervisor running in
any guest context, and also protects the hypervisor
from invalid memory accesses from the driver. An upcall
mechanism allows the hypervisor to largely reuse the
driver support infrastructure present in the VM. The
TwinDriver system thus combines most of the performance
benefits of hypervisor-based driver approaches with the
safety and software engineering benefits of VM-based
driver approaches. Using the TwinDrivers hypervisor
driver, we are able to improve the guest domain
networking throughput in Xen by a factor of 2.4 for
transmit workloads, and 2.1 for receive workloads, both
in CPU-scaled units, and achieve close to 64-67 of
native Linux throughput.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Burcea:2009:PBV,
author = "Ioana Burcea and Andreas Moshovos",
title = "{Phantom-BTB}: a virtualized branch target buffer
design",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "313--324",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508281",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern processors use branch target buffers (BTBs) to
predict the target address of branches such that they
can fetch ahead in the instruction stream increasing
concurrency and performance. Ideally, BTBs would be
sufficiently large to capture the entire working set of
the application and sufficiently small for fast access
and practical on-chip dedicated storage. Depending on
the application, these requirements are at odds. This
work introduces a BTB design that accommodates large
instruction footprints without dedicating expensive
onchip resources. In the proposed Phantom-BTB (PBTB)
design, a conventional BTB is augmented with a virtual
table that collects branch target information as the
application runs. The virtual table does not have fixed
dedicated storage. Instead, it is transparently
allocated, on demand, in the on-chip caches, at cache
line granularity. The entries in the virtual table are
proactively prefetched and installed in the dedicated
conventional BTB, thus, increasing its perceived
capacity. Experimental results with commercial
workloads under full-system simulation demonstrate that
PBTB improves IPC performance over a 1K-entry BTB by
6.9\% on average and up to 12.7\%, with a storage
overhead of only 8\%. Overall, the virtualized design
performs within 1\% of a conventional 4K-entry,
single-cycle access BTB, while the dedicated storage is
3.6 times smaller.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Ramani:2009:SSF,
author = "Karthik Ramani and Christiaan P. Gribble and Al
Davis",
title = "{StreamRay}: a stream filtering architecture for
coherent ray tracing",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "325--336",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508282",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The wide availability of commodity graphics processors
has made real-time graphics an intrinsic component of
the human/computer interface. These graphics cores
accelerate the z-buffer algorithm and provide a highly
interactive experience at a relatively low cost.
However, many applications in entertainment, science,
and industry require high quality lighting effects such
as accurate shadows, reflection, and refraction. These
effects can be difficult to achieve with z-buffer
algorithms but are straightforward to implement using
ray tracing. Although ray tracing is computationally
more complex, the algorithm exhibits excellent scaling
and parallelism properties. Nevertheless, ray tracing
memory access patterns are difficult to predict and the
parallelism speedup promise is therefore hard to
achieve. This paper highlights a novel approach to ray
tracing based on stream filtering and presents
StreamRay, a multicore wide SIMD microarchitecture that
delivers interactive frame rates of 15-32 frames/second
for scenes of high geometric complexity and exhibits
high utilization for SIMD widths ranging from eight to
16 elements. StreamRay consists of two main components:
the ray engine, which is responsible for stream
assembly and employs address generation units that
generate addresses to form large SIMD vectors, and the
filter engine, which implements the ray tracing
operations with programmable accelerators. Results
demonstrate that separating address and data processing
reduces data movement and resource contention.
Performance improves by 56\% while simultaneously
providing 11.63\% power savings per accelerator core
compared to a design which does not use separate
resources for address and data computations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Cameron:2009:ASS,
author = "Robert D. Cameron and Dan Lin",
title = "Architectural support for {SWAR} text processing with
parallel bit streams: the inductive doubling
principle",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "1",
pages = "337--348",
month = mar,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2528521.1508283",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:47:19 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Parallel bit stream algorithms exploit the SWAR (SIMD
within a register) capabilities of commodity processors
in high-performance text processing applications such
as UTF-8 to UTF-16 transcoding, XML parsing, string
search and regular expression matching. Direct
architectural support for these algorithms in future
SWAR instruction sets could further increase
performance as well as simplifying the programming
task. A set of simple SWAR instruction set extensions
are proposed for this purpose based on the principle of
systematic support for inductive doubling as an
algorithmic technique. These extensions are shown to
significantly reduce instruction count in core parallel
bit stream algorithms, often providing a 3X or better
improvement. The extensions are also shown to be useful
for SWAR programming in other application areas,
including providing a systematic treatment for
horizontal operations. An implementation model for
these extensions involves relatively simple circuitry
added to the operand fetch components in a pipelined
processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS 2009 conference proceedings.",
}
@Article{Jouppi:2009:ISI,
author = "Norman P. Jouppi and Rakesh Kumar and Dean Tullsen",
title = "Introduction to the special issue on the {2008
Workshop on Design, Analysis, and Simulation of Chip
Multiprocessors (dasCMP'08)}",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "1--1",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577131",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zeng:2009:MCA,
author = "Hui Zeng and Matt Yourst and Kanad Ghose and Dmitry
Ponomarev",
title = "{MPTLsim}: a cycle-accurate, full-system simulator for
x86-64 multicore architectures with coherent caches",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "2--9",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577132",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The introduction of multicore microprocessors in the
recent years has made it imperative to use
cycle-accurate and full-system simulators in the
architecture research community. We introduce MPTLsim a
multicore simulator for the X86 ISA that meets this
need. MPTLsim is a uop-accurate, cycle-accurate,
full-system simulator for multicore designs based on
the X86-64 ISA. MPTLsim extends PTLsim, a publicly
available single core simulator, with a host of
additional features to support hyperthreading within a
core and multiple cores, with detailed models for
caches, on-chip interconnections and the memory data
flow. MPTLsim incorporates detailed simulation models
for cache controllers, interconnections and has
built-in implementations of a number of cache coherency
protocols.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Monchiero:2009:HSC,
author = "Matteo Monchiero and Jung Ho Ahn and Ayose Falc{\'o}n
and Daniel Ortega and Paolo Faraboschi",
title = "How to simulate 1000 cores",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "10--19",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577133",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes a novel methodology to efficiently
simulate shared-memory multiprocessors composed of
hundreds of cores. The basic idea is to use
thread-level parallelism in the software system and
translate it into core-level parallelism in the
simulated world. To achieve this, we first augment an
existing full-system simulator to identify and separate
the instruction streams belonging to the different
software threads. Then, the simulator dynamically maps
each instruction flow to the corresponding core of the
target multi-core architecture, taking into account the
inherent thread synchronization of the running
applications. Our simulator allows a user to execute
any multithreaded application in a conventional
full-system simulator and evaluate the performance of
the application on a many-core hardware. We carried out
extensive simulations on the SPLASH-2 benchmark suite
and demonstrated the scalability up to 1024 cores with
limited simulation speed degradation vs. the
single-core case on a fixed workload. The results also
show that the proposed technique captures the intrinsic
behavior of the SPLASH-2 suite, even when we scale up
the number of shared-memory cores beyond the
thousand-core limit.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2009:SPP,
author = "Jianwei Chen and Murali Annavaram and Michel Dubois",
title = "{SlackSim}: a platform for parallel simulations of
{CMPs} on {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "20--29",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577134",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The fast simulation of chip multiprocessors (CMPs)
presents a critical challenge to the architecture
research community as both industry and academia shift
their research focus to multicore design. Parallel
simulation is a technique to accelerate
microarchitecture simulation of CMPs by exploiting the
inherent parallelism of CMPs. In this paper, we explore
the simulation paradigm of simulating each core of a
target CMP in one thread and then spreading the threads
across the hardware thread contexts of a host CMP. We
implement several parallel simulation schemes using
POSIX Threads (Pthreads). We start with cycle-by-cycle
simulation and then relax the synchronization condition
in various schemes, which we call slack
simulations.\par
In slack simulations, the Pthreads simulating different
simulated cores do not synchronize after each simulated
cycle, but rather they are given some slack. The slack
is the difference in cycle between the simulated times
of any two target cores. Small slacks, such as a few
cycles, greatly improve the efficiency of parallel CMP
simulations, with no or negligible simulation error. We
have developed a simulation framework called SlackSim
to experiment with various slack simulation schemes.
Unlike previous attempts to parallelize multiprocessor
simulations on distributed memory machines, SlackSim
takes advantage of the efficient sharing of data in the
host CMP architecture.\par
We demonstrate the efficiency and accuracy of some well
known slack simulation schemes and of some new ones on
SlackSim running on a state-of-the-art CMP platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Purnaprajna:2009:RTR,
author = "Madhura Purnaprajna and Mario Porrmann and Ulrich
Rueckert",
title = "Run-time reconfigurability in embedded
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "30--37",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577135",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To meet application-specific performance demands,
architectures are predominantly redesigned and
customised. Every architectural change results in huge
overheads in design, verification, and fabrication,
which together result in prolonged time-to-market. As
an alternative, configurable architectures provide easy
adaptability to different application domains in place
of costly redesigns. To deal with application changes
and custom requirements, a method of configuring and
reusing the basic building blocks within processors is
developed. Additionally, this enables co-operative
multiprocessing. In this paper, a runtime
reconfiguration mechanism for embedded multiprocessor
architectures is proposed as a method to introduce
customisations in the post-fabrication phase. A method
of application description in conjunction with a
flexible reconfigurable multiprocessor template is
presented. Finally, the costs and benefits of this
approach are analysed for computationally intensive
algorithms used in digital signal processing. The
impact of application specific characteristics on
execution time, power consumption, and total energy
dissipation are analysed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jesshope:2009:ISM,
author = "Chris Jesshope and Mike Lankamp and Li Zhang",
title = "The implementation of an {SVP} many-core processor and
the evaluation of its memory architecture",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "38--45",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577136",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many-core processor architectures require scalable
solutions that reflect the locality and power
constraints of future generations of silicon
technology. This paper presents a many-core processor
that supports an abstract model of concurrency, based
on a Self-adaptive Virtual Processor (SVP). This
processor implements instructions, which automatically
map and schedule threads providing a code devoid of any
explicit communication. The thrust of this approach is
to produce binary code that is divorced from
implementation parameters, yet, which still gives good
performance over future generations of CMPs. A key
component of this processor architecture is the memory
system. This paper briefly presents the model and
evaluates its memory architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singh:2009:RTP,
author = "Karan Singh and Major Bhadauria and Sally A. McKee",
title = "Real time power estimation and thread scheduling via
performance counters",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "46--55",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577137",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Estimating power consumption is critical for hardware
and software developers, and of the latter,
particularly for OS programmers writing process
schedulers. However, obtaining processor and system
power consumption information can be non-trivial.
Simulators are time consuming and prone to error. Power
meters report whole-system consumption, but cannot give
per-processor or per-thread information. More intrusive
hardware instrumentation is possible, but such
solutions are usually employed while designing the
system, and are not meant for customer use.\par
Given these difficulties, plus the current availability
of some form of performance counters on virtually all
platforms (even though such counters were initially
designed for system bring-up, and not intended for
general programmer consumption), we analytically derive
functions for real-time estimation of processor and
system power consumption using performance counter data
on real hardware. Our model uses data gathered from
microbenchmarks that capture potential application
behavior. The model is independent of our test
benchmarks, and thus we expect it to be well suited for
future applications. We target chip multiprocessors,
analyzing effects of shared resources and temperature
on power estimation, leveraging our model to implement
a simple, power-aware thread scheduler. The NAS and
SPEC-OMP benchmarks shows a median error of 5.8\% and
3.9\%, respectively. SPEC 2006 shows a marginally
higher median error of 7.2\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Azizi:2009:AEC,
author = "Omid Azizi and Aqeel Mahesri and Sanjay J. Patel and
Mark Horowitz",
title = "Area-efficiency in {CMP} core design: co-optimization
of microarchitecture and physical design",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "56--65",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577138",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we examine the area-performance design
space of a processing core for a chip multiprocessor
(CMP), considering both the architectural design space
and the tradeoffs of the physical design on which the
architecture relies. We first propose a methodology for
performing an integrated optimization of both the
micro-architecture and the physical circuit design of a
microprocessor. In our approach, we use statistical and
convex fitting methods to capture a large
micro-architectural design space. We then characterize
the area-delay tradeoffs of the underlying circuits
through RTL synthesis. Finally, we establish the
relationship between the architecture and the circuits
in an integrative model, which we use to optimize the
processor. As a case study, we apply this methodology
to explore the performance-area tradeoffs in a highly
parallel accelerator architecture for visual computing
applications. Based on some early circuit tradeoff
data, our results indicate that two separate designs
are performance/area optimal for our set of benchmarks:
a simpler single-issue, 2-way multithreaded core
running at high-frequency, and a more aggressively
tuned dual-issue 4-way multithreaded design running at
a lower frequency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2009:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "2",
pages = "66--69",
month = may,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1577129.1577140",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:39 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yelick:2009:TWW,
author = "Katherine Yelick",
title = "Ten ways to waste a parallel computer",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "1--1",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555755",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As clock speed increases taper off and hardware
designers struggle to scale parallelism within a chip,
software developers and researchers must face the
challenge of writing portable software with no clear
architectural target. On the hardware side, energy
considerations will dominate many of the design
decisions, and will ultimately limit what systems and
applications can be built. This is especially true at
the high end, where the next major milestone of
exascale computing will be unattainable without major
improvements in efficiency.\par
Although hardware designers have long worried about the
efficiency of their designs, especially for
battery-operated devices, software developers in
general have not. To illustrate this point, I will
describe some of the top ways to waste time and
therefore energy waiting for communication,
synchronization, or interactions with users or other
systems. Data movement, rather than computation, is the
big consumer of energy, yet software often moves data
up and down the memory hierarchy or across a network
multiple times. At the same time, hardware designers
need to take into account the constraints of the
computational problems that will run on their systems,
as a design that is poorly matched to the computational
requirements will end up being inefficient. Drawing on
my own experience in scientific computing, I will give
examples of how to make the combination of hardware,
algorithms and software more efficient, but also
describe some of the challenges that are inherent in
the application problems we want to solve. The
community needs to take an integrated approach to the
problem, and consider how much business or science can
be done per Joule, rather than optimizing a particular
component of the system in isolation. This will require
rethinking the algorithms, programming models, and
hardware in concert, and therefore an unprecedented
level of collaboration and cooperation between hardware
and software designers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "energy; parallel computer",
}
@Article{Lee:2009:APC,
author = "Benjamin C. Lee and Engin Ipek and Onur Mutlu and Doug
Burger",
title = "Architecting phase change memory as a scalable {DRAM}
alternative",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "2--13",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555758",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory scaling is in jeopardy as charge storage and
sensing mechanisms become less reliable for prevalent
memory technologies, such as DRAM. In contrast, phase
change memory (PCM) storage relies on scalable current
and thermal mechanisms. To exploit PCM's scalability as
a DRAM alternative, PCM must be architected to address
relatively long latencies, high energy writes, and
finite endurance.\par
We propose, crafted from a fundamental understanding of
PCM technology parameters, area-neutral architectural
enhancements that address these limitations and make
PCM competitive with DRAM. A baseline PCM system is
1.6x slower and requires 2.2x more energy than a DRAM
system. Buffer reorganizations reduce this delay and
energy gap to 1.2x and 1.0x, using narrow rows to
mitigate write energy and multiple rows to improve
locality and write coalescing. Partial writes enhance
memory endurance, providing 5.6 years of lifetime.
Process scaling will further reduce PCM energy costs
and improve endurance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "DRAM alternative; endurance; energy; PCM; performance;
phase change memory; power; scalability",
}
@Article{Zhou:2009:DEE,
author = "Ping Zhou and Bo Zhao and Jun Yang and Youtao Zhang",
title = "A durable and energy efficient main memory using phase
change memory technology",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "14--23",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555759",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Using nonvolatile memories in memory hierarchy has
been investigated to reduce its energy consumption
because nonvolatile memories consume zero leakage power
in memory cells. One of the difficulties is, however,
that the endurance of most nonvolatile memory
technologies is much shorter than the conventional SRAM
and DRAM technology. This has limited its usage to only
the low levels of a memory hierarchy, e.g., disks, that
is far from the CPU.\par
In this paper, we study the use of a new type of
nonvolatile memories -- the Phase Change Memory (PCM)
as the main memory for a 3D stacked chip. The main
challenges we face are the limited PCM endurance,
longer access latencies, and higher dynamic power
compared to the conventional DRAM technology. We
propose techniques to extend the endurance of the PCM
to an average of 13 (for MLC PCM cell) to 22 (for SLC
PCM) years. We also study the design choices of
implementing PCM to achieve the best tradeoff between
energy and performance. Our design reduced the total
energy of an already low-power DRAM main memory of the
same capacity by 65\%, and energy-delay$^2$ product by
60\%. These results indicate that it is feasible to use
PCM technology in place of DRAM in the main memory for
better energy efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "endurance; low power; phase change memory",
}
@Article{Qureshi:2009:SHP,
author = "Moinuddin K. Qureshi and Vijayalakshmi Srinivasan and
Jude A. Rivers",
title = "Scalable high performance main memory system using
phase-change memory technology",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "24--33",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555760",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The memory subsystem accounts for a significant cost
and power budget of a computer system. Current
DRAM-based main memory systems are starting to hit the
power and cost limit. An alternative memory technology
that uses resistance contrast in phase-change materials
is being actively investigated in the circuits
community. {\em Phase Change Memory (PCM)\/} devices
offer more density relative to DRAM, and can help
increase main memory capacity of future systems while
remaining within the cost and power constraints.\par
In this paper, we analyze a PCM-based hybrid main
memory system using an architecture level model of PCM.
We explore the trade-offs for a main memory system
consisting of PCMstorage coupled with a small DRAM
buffer. Such an architecture has the latency benefits
of DRAM and the capacity benefits of PCM. Our
evaluations for a baseline system of 16-cores with 8GB
DRAM show that, on average, PCM can reduce page faults
by 5X and provide a speedup of 3X. As PCM is projected
to have limited write endurance, we also propose simple
organizational and management solutions of the hybrid
memory that reduces the write traffic to PCM, boosting
its lifetime from 3 years to 9.7 years.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "DRAM caching; phase change memory; wear leveling",
}
@Article{Wu:2009:HCA,
author = "Xiaoxia Wu and Jian Li and Lixin Zhang and Evan
Speight and Ram Rajamony and Yuan Xie",
title = "Hybrid cache architecture with disparate memory
technologies",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "34--45",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555761",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Caching techniques have been an efficient mechanism
for mitigating the effects of the processor-memory
speed gap. Traditional multi-level SRAM-based cache
hierarchies, especially in the context of chip
multiprocessors (CMPs), present many challenges in area
requirements, core-to-cache balance, power consumption,
and design complexity. New advancements in technology
enable caches to be built from other technologies, such
as Embedded DRAM (EDRAM), Magnetic RAM (MRAM), and
Phase-change RAM (PRAM), in both 2D chips or 3D stacked
chips. Caches fabricated in these technologies offer
dramatically different power and performance
characteristics when compared with SRAM-based caches,
particularly in the areas of access latency, cell
density, and overall power consumption. In this paper,
we propose to take advantage of the best
characteristics that each technology offers, through
the use of Hybrid Cache Architecture (HCA) designs. We
discuss and evaluate two types of hybrid cache
architectures: inter cache Level HCA (LHCA), in which
the levels in a cache hierarchy can be made of
disparate memory technologies; and intra cache level or
cache Region based HCA (RHCA), where a single level of
cache can be partitioned into multiple regions, each of
a different memory technology. We have studied a number
of different HCA architectures and explored the
potential of hardware support for intra-cache data
movement and power consumption management within HCA
caches. Utilizing a full-system simulator that has been
validated against real hardware, we demonstrate that an
LHCA design can provide a geometric mean 7\% IPC
improvement over a baseline 3-level SRAM cache design
under the same area constraint across a collection of
25 workloads. A more aggressive RHCA-based design
provides 12\% IPC improvement over the baseline.
Finally, a 2-layer 3D cache stack (3DHCA) of high
density memory technology within the same chip
footprint gives 18\% IPC improvement over the baseline.
Furthermore, up to 70\% reduction in power consumption
over a baseline SRAM-only design is achieved.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "hybrid cache architecture; three-dimensional IC",
}
@Article{Suh:2009:DMR,
author = "Jinho Suh and Michel Dubois",
title = "Dynamic {MIPS} rate stabilization in out-of-order
processors",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "46--56",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555763",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Today's microprocessor cores reach high performance
levels not only by their high clock rate but also by
the concurrent execution of a large number of
instructions. Because of the relationship between power
and frequency, it becomes attractive to run an OoO
(Out-of-Order) core at a frequency lower than its
nominal frequency in the context of embedded or
real-time systems. Unfortunately, whereas OoO pipelines
have high average throughput, their highly variable and
hard-to-predict execution rate makes them unsuitable
for real-time systems with hard or even soft deadlines.
In this paper, we demonstrate that the execution time
of an OoO processor can be stable and predictable by
controlling its MIPS (Mega Instructions Per Second)
rate via a PID (Proportional, Integral, and
Differential gain) feedback controller and DVFS
(Dynamic Voltage and Frequency Scaling). The stabilized
processor uses much less power per committed
instruction, because of the reduced average frequency.
The EPI (Energy Per Instruction) is also cut by an
average of 28\% across our benchmark programs. Since a
stable MIPS rate is maintained consistently with lower
power/energy per instruction, OoO processors stabilized
by a feedback controller can realistically be deployed
in real-time systems. To demonstrate this capability we
select a subset of the MiBench benchmarks that displays
the widest execution rate variations and stabilize
their MIPS rate in the context of a 1GHz Pentium
III-like microarchitecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "embedded systems; OoO processors; real-time systems;
stabilization; variability",
}
@Article{Paolieri:2009:HSW,
author = "Marco Paolieri and Eduardo Qui{\~n}ones and Francisco
J. Cazorla and Guillem Bernat and Mateo Valero",
title = "Hardware support for {WCET} analysis of hard real-time
multicore systems",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "57--68",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555764",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The increasing demand for new functionalities in
current and future hard real-time embedded systems like
automotive, avionics and space industries is driving an
increase in the performance required in embedded
processors. Multicore processors represent a good
design solution for such systems due to their high
performance, low cost and power consumption
characteristics. However, hard real-time embedded
systems require time analyzability and current
multicore processors are less analyzable than
single-core processors due to the interferences between
different tasks when accessing shared hardware
resources. In this paper we propose a multicore
architecture with shared resources that allows the
execution of applications with hard real-time and non
hard real-time constraints at the same time, providing
time analizability for the hard real-time tasks so that
they can meet their deadlines. Moreover our
architecture proposal provides high-performance for the
non hard real-time tasks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "analyzability; cache partitioning; hard real-time;
interconnection network; multicore; real-time embedded
systems; WCET",
}
@Article{Somogyi:2009:STM,
author = "Stephen Somogyi and Thomas F. Wenisch and Anastasia
Ailamaki and Babak Falsafi",
title = "Spatio-temporal memory streaming",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "69--80",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555766",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent research advocates memory streaming techniques
to alleviate the performance bottleneck caused by the
high latencies of off-chip memory accesses. Temporal
memory streaming replays previously observed miss
sequences to eliminate long chains of dependent misses.
Spatial memory streaming predicts repetitive data
layout patterns within fixed-size memory regions.
Because each technique targets a different subset of
misses, their effectiveness varies across workloads and
each leaves a significant fraction of misses
unpredicted.\par
In this paper, we propose Spatio-Temporal Memory
Streaming (STeMS) to exploit the synergy between
spatial and temporal streaming. We observe that the
order of spatial accesses repeats both within and
across regions. STeMS records and replays the temporal
sequence of region accesses and uses spatial
relationships within each region to dynamically
reconstruct a predicted total miss order. Using
trace-driven and cycle-accurate simulation across a
suite of commercial workloads, we demonstrate that with
similar implementation complexity as temporal
streaming, STeMS achieves equal or higher coverage than
spatial or temporal memory streaming alone, and
improves performance by 31\%, 3\%, and 18\% over
stride, spatial, and temporal prediction,
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "prefetching; spatial correlation; temporal
correlation",
}
@Article{Diaz:2009:SCE,
author = "Pedro Diaz and Marcelo Cintra",
title = "Stream chaining: exploiting multiple levels of
correlation in data prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "81--92",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555767",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Data prefetching has long been an important technique
to amortize the effects of the memory wall, and is
likely to remain so in the current era of multi-core
systems. Most prefetchers operate by identifying
patterns and correlations in the miss address stream.
Separating streams according to the memory access
instruction that generates the misses is an effective
way of filtering out spurious addresses from
predictable streams. On the other hand, by localizing
streams based on the memory access instructions, such
prefetchers both lose the complete time sequence
information of misses and can only issue prefetches for
a single memory access instruction at a time.\par
This paper proposes a novel class of prefetchers based
on the idea of linking various localized streams into
predictable chains of missing memory access
instructions such that the prefetcher can issue
prefetches along multiple streams. In this way the
prefetcher is not limited to prefetching deeply for a
single missing memory access instruction but can
instead adaptively prefetch for other memory access
instructions closer in time.\par
Experimental results show that the proposed prefetcher
consistently achieves better performance than a
state-of-the-art prefetcher -- 10\% on average, being
only outperformed in very few cases and then by only
2\%, and outperforming that prefetcher by as much as
55\% -- while consuming the same amount of memory
bandwidth.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "data prefetching",
}
@Article{Powell:2009:ACS,
author = "Michael D. Powell and Arijit Biswas and Shantanu Gupta
and Shubhendu S. Mukherjee",
title = "Architectural core salvaging in a multi-core processor
for hard-error tolerance",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "93--104",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555769",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The incidence of hard errors in CPUs is a challenge
for future multicore designs due to increasing total
core area. Even if the location and nature of hard
errors are known a priori, either at manufacture-time
or in the field, cores with such errors must be
disabled in the absence of hard-error tolerance. While
caches, with their regular and repetitive structures,
are easily covered against hard errors by providing
spare arrays or spare lines, structures within a core
are neither as regular nor as repetitive. Previous work
has proposed microarchitectural core salvaging to
exploit structural redundancy within a core and
maintain functionality in the presence of hard errors.
Unfortunately microarchitectural salvaging introduces
complexity and may provide only limited coverage of
core area against hard errors due to a lack of natural
redundancy in the core.\par
This paper makes a case for architectural core
salvaging. We observe that even if some individual
cores cannot execute certain operations, a CPU die can
be instruction-set-architecture (ISA) compliant, that
is execute all of the instructions required by its ISA,
by exploiting natural cross-core redundancy. We propose
using hardware to migrate offending threads to another
core that can execute the operation. Architectural core
salvaging can cover a large core area against faults,
and be implemented by leveraging known techniques that
minimize changes to the microarchitecture. We show it
is possible to optimize architectural core salvaging
such that the performance on a faulty die approaches
that of a fault-free die--assuring significantly better
performance than core disabling for many workloads and
no worse performance than core disabling for the
remainder.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "core salvaging; hard errors; redundancy; reliability",
}
@Article{Carretero:2009:EER,
author = "Javier Carretero and Pedro Chaparro and Xavier Vera
and Jaume Abella and Antonio Gonz{\'a}lez",
title = "End-to-end register data-flow continuous self-test",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "105--115",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555770",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "While Moore's Law predicts the ability of
semi-conductor industry to engineer smaller and more
efficient transistors and circuits, there are serious
issues not contemplated in that law. One concern is the
verification effort of modern computing systems, which
has grown to dominate the cost of system design. On the
other hand, technology scaling leads to burn-in phase
out. As a result, in-the-field error rate may increase
due to both actual errors and latent defects. Whereas
data can be protected with arithmetic codes (like
parity or ECC), there is a lack of cost-effective
mechanisms for control logic.\par
This paper presents a light-weight microarchitectural
mechanism that ensures that data consumed through
registers are correct. Microarchitecture presents a new
way to manage reliability and testing without
significantly sacrificing cost and performance,
offering a unique opportunity to detect errors in the
field at low cost. Our results show a coverage around
90\% for the targeted structures with a cost in power
and area of about 4\%. The structures protected include
the issue queue logic and the data associated (i.e.,
tags, control signals), input multiplexors, rename
data, replay logic, register free list, bypasses data
and logic, MOB data and addresses, register file logic,
register file storage and functional units.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "control logic; degradation; design errors; end-to-end
protection; online testing",
}
@Article{Yoon:2009:MME,
author = "Doe Hyun Yoon and Mattan Erez",
title = "Memory mapped {ECC}: low-cost error protection for
last level caches",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "116--127",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555771",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a novel technique, Memory Mapped
ECC, which reduces the cost of providing error
correction for SRAM caches. It is important to limit
such overheads as processor resources become
constrained and error propensity increases. The
continuing decrease in SRAM cell size and the growing
capacity of caches increases the likelihood of errors
in SRAM arrays. To address this, redundant information
can be used to correct a value after an error occurs.
Information redundancy is typically provided through
error-correcting codes (ECC), which append bits to
every SRAM row and increase the array's area and energy
consumption. We make three observations regarding error
protection and utilize them in our architecture: (1)
much of the data in a cache is replicated throughout
the hierarchy and is inherently redundant; (2)
error-detection is necessary for every cache access and
is cheaper than error correction, which is very
infrequent; (3) redundant information for correction
need not be stored in high-cost SRAM. Our unique
architecture only dedicates SRAM for error detection
while the ECC bits are stored within the memory
hierarchy as data. We associate a physical memory
address with each cache line for ECC storage and rely
on locality to minimize the impact. The cache is
dynamically and transparently partitioned between data
and ECC with the fraction of ECC growing with the
number of dirty cache lines. We show that this has
little impact on both performance (1.3\% average and <
4\%) and memory traffic (3\%) across a range of
memory-intensive applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "error correction; last-level caches; reliability; soft
error",
}
@Article{Woh:2009:AAA,
author = "Mark Woh and Sangwon Seo and Scott Mahlke and Trevor
Mudge and Chaitali Chakrabarti and Krisztian Flautner",
title = "{AnySP}: anytime anywhere anyway signal processing",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "128--139",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555773",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the past decade, the proliferation of mobile
devices has increased at a spectacular rate. There are
now more than 3.3 billion active cell phones in the
world---a device that we now all depend on in our daily
lives. The current generation of devices employs a
combination of general-purpose processors, digital
signal processors, and hardwired accelerators to
provide giga-operations-per-second performance on
milliwatt power budgets. Such heterogeneous
organizations are inefficient to build and maintain, as
well as waste silicon area and power. Looking forward
to the next generation of mobile computing, computation
requirements will increase by one to three orders of
magnitude due to higher data rates, increased
complexity algorithms, and greater computation
diversity but the power requirements will be just as
stringent. Scaling of existing approaches will not
suffice instead the inherent computational efficiency,
programmability, and adaptability of the hardware must
change. To overcome these challenges, this paper
proposes an example architecture, referred to as AnySP,
for the next generation mobile signal processing. AnySP
uses a co-design approach where the next generation
wireless signal processing and high-definition video
algorithms are analyzed to create a domain specific
programmable architecture. At the heart of AnySP is a
configurable single-instruction multiple-data datapath
that is capable of processing wide vectors or multiple
narrow vectors simultaneously. In addition, deeper
computation subgraphs can be pipelined across the
single-instruction multiple-data lanes. These three
operating modes provide high throughput across varying
application types. Results show that AnySP is capable
of sustaining 4G wireless processing and
high-definition video throughput rates, and will
approach the 1000 Mops/mW efficiency barrier when
scaled to 45nm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "fully programmable architecture; high-end signal
processing; low-power architecture; SIMD;
single-instruction multiple-data parallelism; software
defined radio",
}
@Article{Kelm:2009:RAS,
author = "John H. Kelm and Daniel R. Johnson and Matthew R.
Johnson and Neal C. Crago and William Tuohy and Aqeel
Mahesri and Steven S. Lumetta and Matthew I. Frank and
Sanjay J. Patel",
title = "{Rigel}: an architecture and scalable programming
interface for a 1000-core accelerator",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "140--151",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555774",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper considers Rigel, a programmable accelerator
architecture for a broad class of data- and
task-parallel computation. Rigel comprises 1000+
hierarchically-organized cores that use a fine-grained,
dynamically scheduled single-program, multiple-data
(SPMD) execution model. Rigel's low-level programming
interface adopts a single global address space model
where parallel work is expressed in a task-centric,
bulk-synchronized manner using minimal hardware
support. Compared to existing accelerators, which
contain domain-specific hardware, specialized memories,
and/or restrictive programming models, Rigel is more
flexible and provides a straightforward target for a
broader set of applications.\par
We perform a design analysis of Rigel to quantify the
compute density and power efficiency of our initial
design. We find that Rigel can achieve a density of
over 8 single-precision GFLOPS/mm$^2$ in 45nm, which is
comparable to high-end GPUs scaled to 45nm. We perform
experimental analysis on several applications ported to
the Rigel low-level programming interface. We examine
scalability issues related to work distribution,
synchronization, and load-balancing for 1000-core
accelerators using software techniques and minimal
specialized hardware support. We find that while it is
important to support fast task distribution and barrier
operations, these operations can be implemented without
specialized hardware using flexible hardware
primitives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "accelerator; computer architecture; low-level
programming interface",
}
@Article{Hong:2009:AMG,
author = "Sunpyo Hong and Hyesoon Kim",
title = "An analytical model for a {GPU} architecture with
memory-level and thread-level parallelism awareness",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "152--163",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555775",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPU architectures are increasingly important in the
multi-core era due to their high number of parallel
processors. Programming thousands of massively parallel
threads is a big challenge for software engineers, but
understanding the performance bottlenecks of those
parallel programs on GPU architectures to improve
application performance is even more difficult. Current
approaches rely on programmers to tune their
applications by exploiting the design space
exhaustively without fully understanding the
performance characteristics of their
applications.\par
To provide insights into the performance bottlenecks of
parallel applications on GPU architectures, we propose
a simple analytical model that estimates the execution
time of massively parallel programs. The key component
of our model is estimating the number of parallel
memory requests (we call this the memory warp
parallelism) by considering the number of running
threads and memory bandwidth. Based on the degree of
memory warp parallelism, the model estimates the cost
of memory requests, thereby estimating the overall
execution time of a program. Comparisons between the
outcome of the model and the actual execution time in
several GPUs show that the geometric mean of absolute
error of our model on micro-benchmarks is 5.4\% and on
GPU computing applications is 13.3\%. All the
applications are written in the CUDA programming
language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "analytical model; CUDA; GPU architecture; memory level
parallelism; performance estimation; warp level
parallelism",
}
@Article{Biswas:2009:MEM,
author = "Susmit Biswas and Diana Franklin and Alan Savage and
Ryan Dixon and Timothy Sherwood and Frederic T. Chong",
title = "Multi-execution: multicore caching for data-similar
executions",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "164--173",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555777",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "While microprocessor designers turn to multicore
architectures to sustain performance expectations, the
dramatic increase in parallelism of such architectures
will put substantial demands on off-chip bandwidth and
make the memory wall more significant than ever. This
paper demonstrates that one profitable application of
multicore processors is the execution of many similar
instantiations of the same program. We identify that
this model of execution is used in several practical
scenarios and term it as 'multi-execution.' Often, each
such instance utilizes very similar data. In
conventional cache hierarchies, each instance would
cache its own data independently. We propose the
Mergeable cache architecture that detects data
similarities and merges cache blocks, resulting in
substantial savings in cache storage requirements. This
leads to reductions in off-chip memory accesses and
overall power usage, and increases in application
performance. We present cycle-accurate simulation
results of 8 benchmarks (6 from SPEC2000) to
demonstrate that our technique provides a scalable
solution and leads to significant speedups due to
reductions in main memory accesses. For 8 cores running
8 similar executions of the same application and
sharing an exclusive 4-MB, 8-way L2 cache, the
Mergeable cache shows a speedup in execution by 2.5x on
average (ranging from 0.93x to 6.92x), while posing an
overhead of only 4.28\% on cache area and 5.21\% on
power when it is used.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "CMP; data similar execution; multicore cache design",
}
@Article{Xie:2009:PPI,
author = "Yuejian Xie and Gabriel H. Loh",
title = "{PIPP}: promotion\slash insertion pseudo-partitioning
of multi-core shared caches",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "174--183",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555778",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many multi-core processors employ a large last-level
cache (LLC) shared among the multiple cores. Past
research has demonstrated that sharing-oblivious cache
management policies (e.g., LRU) can lead to poor
performance and fairness when the multiple cores
compete for the limited LLC capacity. Different memory
access patterns can cause cache contention in different
ways, and various techniques have been proposed to
target some of these behaviors. In this work, we
propose a new cache management approach that combines
dynamic insertion and promotion policies to provide the
benefits of cache partitioning, adaptive insertion, and
capacity stealing all with a single mechanism. By
handling multiple types of memory behaviors, our
proposed technique outperforms techniques that target
only either capacity partitioning or adaptive
insertion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache; contention; insertion; multi-core; promotion;
sharing",
}
@Article{Hardavellas:2009:RNN,
author = "Nikos Hardavellas and Michael Ferdman and Babak
Falsafi and Anastasia Ailamaki",
title = "{Reactive NUCA}: near-optimal block placement and
replication in distributed caches",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "184--195",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555779",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Increases in on-chip communication delay and the large
working sets of server and scientific workloads
complicate the design of the on-chip last-level cache
for multicore processors. The large working sets favor
a shared cache design that maximizes the aggregate
cache capacity and minimizes off-chip memory requests.
At the same time, the growing on-chip communication
delay favors core-private caches that replicate data to
minimize delays on global wires. Recent hybrid
proposals offer lower average latency than conventional
designs, but they address the placement requirements of
only a subset of the data accessed by the application,
require complex lookup and coherence mechanisms that
increase latency, or fail to scale to high core
counts.\par
In this work, we observe that the cache access patterns
of a range of server and scientific workloads can be
classified into distinct classes, where each class is
amenable to different block placement policies. Based
on this observation, we propose Reactive NUCA (R-NUCA),
a distributed cache design which reacts to the class of
each cache access and places blocks at the appropriate
location in the cache. R-NUCA cooperates with the
operating system to support intelligent placement,
migration, and replication without the overhead of an
explicit coherence mechanism for the on-chip last-level
cache. In a range of server, scientific, and
multiprogrammed workloads, R-NUCA matches the
performance of the best cache design for each workload,
improving performance by 14\% on average over competing
designs and by 32\% at best, while achieving
performance within 5\% of an ideal cache design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "block migration; block placement; block replication;
cache; cache coherence; cache indexing; cache lookup;
cache management; chip multiprocessor; cmp; coherence;
data migration; data placement; data replication;
interleaving; last-level cache; lookup; migration;
multi-core; multicore; non-uniform cache access; NUCA;
placement; private cache; R-NUCA; Reactive NUCA;
replication; rotational interleaving; shared cache",
}
@Article{Moscibroda:2009:CBR,
author = "Thomas Moscibroda and Onur Mutlu",
title = "A case for bufferless routing in on-chip networks",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "196--207",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555781",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Buffers in on-chip networks consume significant
energy, occupy chip area, and increase design
complexity. In this paper, we make a case for a new
approach to designing on-chip interconnection networks
that eliminates the need for buffers for routing or
flow control. We describe new algorithms for routing
without using buffers in router input/output ports. We
analyze the advantages and disadvantages of bufferless
routing and discuss how router latency can be reduced
by taking advantage of the fact that input/output
buffers do not exist. Our evaluations show that routing
without buffers significantly reduces the energy
consumption of the on-chip cache/processor-to-cache
network, while providing similar performance to that of
existing buffered routing algorithms at low network
utilization (i.e., on most real applications). We
conclude that bufferless routing can be an attractive
and energy-efficient design option for on-chip
cache/processor-to-cache networks where network
utilization is low.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "memory systems; multi-core; on-chip networks;
routing",
}
@Article{Kinsy:2009:AAD,
author = "Michel A. Kinsy and Myong Hyon Cho and Tina Wen and
Edward Suh and Marten van Dijk and Srinivas Devadas",
title = "Application-aware deadlock-free oblivious routing",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "208--219",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555782",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Conventional oblivious routing algorithms are either
not application-aware or assume that each flow has its
own private channel to ensure deadlock avoidance. We
present a framework for application-aware routing that
assures deadlock-freedom under one or more channels by
forcing routes to conform to an acyclic channel
dependence graph. Arbitrary minimal routes can be made
deadlock-free through appropriate static channel
allocation when two or more channels are available.
Given bandwidth estimates for flows, we present a mixed
integer-linear programming (MILP) approach and a
heuristic approach for producing deadlock-free routes
that minimize maximum channel load. The heuristic
algorithm is calibrated using the MILP algorithm and
evaluated on a number of benchmarks through detailed
network simulation. Our framework can be used to
produce application-aware routes that target the
minimization of latency, number of flows through a
link, bandwidth, or any combination thereof.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "oblivious routing; on-chip interconnection networks;
systems-on-chip",
}
@Article{Jiang:2009:IAR,
author = "Nan Jiang and John Kim and William J. Dally",
title = "Indirect adaptive routing on large scale
interconnection networks",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "220--231",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555783",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recently proposed high-radix interconnection networks
[10] require global adaptive routing to achieve optimum
performance. Existing direct adaptive routing methods
are slow to sense congestion remote from the source
router and hence misroute many packets before such
congestion is detected. This paper introduces indirect
global adaptive routing (IAR) in which the adaptive
routing decision uses information that is not directly
available at the source router. We describe four IAR
routing methods: credit round trip (CRT) [10],
progressive adaptive routing (PAR), piggyback routing
(PB), and reservation routing (RES). We evaluate each
of these methods on the dragonfly topology under both
steady-state and transient loads. Our results show that
PB, PAR, and CRT all achieve good performance. PB
provides the best absolute performance, with 2-7\%
lower latency on steady-state uniform random traffic at
70\% load, while PAR provides the fastest response on
transient loads. We also evaluate the implementation
costs of the indirect adaptive routing methods and show
that PB has the lowest implementation cost requiring",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dragonfly; interconnection networks; routing",
}
@Article{Hamilton:2009:ISS,
author = "James Hamilton",
title = "{Internet}-scale service infrastructure efficiency",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "232--232",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555756",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "High-scale cloud services provide economies of scale
of five to ten over small-scale deployments, and are
becoming a large part of both enterprise information
processing and consumer services. Even very large
enterprise IT deployments have quite different cost
drivers and optimizations points from internet-scale
services. The former are people-dominated from a cost
perspective whereas internet-scale service costs are
driven by server hardware and infrastructure with
people costs fading into the noise at less than
10\%.\par
In this talk we inventory where the infrastructure
costs are in internet-scale services. We track power
distribution from 115KV at the property line through
all conversions into the data center tracking the
losses to final delivery at semiconductor voltage
levels. We track cooling and all the energy conversions
from power dissipation through release to the
environment outside of the building. Understanding
where the costs and inefficiencies lie, we'll look more
closely at cooling and overall mechanical system
design, server hardware design, and software techniques
including graceful degradation mode, power yield
management, and resource consumption shaping.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "efficiency; Internet-scale",
}
@Article{Blundell:2009:IPT,
author = "Colin Blundell and Milo M. K. Martin and Thomas F.
Wenisch",
title = "{InvisiFence}: performance-transparent memory ordering
in conventional multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "233--244",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555785",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A multiprocessor's memory consistency model imposes
ordering constraints among loads, stores, atomic
operations, and memory fences. Even for consistency
models that relax ordering among loads and stores,
ordering constraints still induce significant
performance penalties due to atomic operations and
memory ordering fences. Several prior proposals reduce
the performance penalty of strongly ordered models
using post-retirement speculation, but these designs
either (1) maintain speculative state at a per-store
granularity, causing storage requirements to grow
proportionally to speculation depth, or (2) employ
distributed global commit arbitration using
unconventional chunk-based invalidation mechanisms. In
this paper we propose InvisiFence, an approach for
implementing memory ordering based on post-retirement
speculation that avoids these concerns. InvisiFence
leverages minimalistic mechanisms for post-retirement
speculation proposed in other contexts to (1) track
speculative state efficiently at block-granularity with
dedicated storage requirements independent of
speculation depth, (2) provide fast commit by avoiding
explicit commit arbitration, and (3) operate under a
conventional invalidation-based cache coherence
protocol. InvisiFence supports both modes of operation
found in prior work: speculating only when necessary to
minimize the risk of rollback-inducing violations or
speculating continuously to decouple consistency
enforcement from the processor core. Overall,
InvisiFence requires approximately one kilobyte of
additional state to transform a conventional
multiprocessor into one that provides
performance-transparent memory ordering, fences, and
atomic operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "memory consistency; parallel programming",
}
@Article{Hilton:2009:DSC,
author = "Andrew Hilton and Amir Roth",
title = "Decoupled store completion\slash silent deterministic
replay: enabling scalable data memory for {CPR\slash
CFP} processors",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "245--254",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555786",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "CPR/CFP (Checkpoint Processing and Recovery/Continual
Flow Pipeline) support an adaptive instruction window
that scales to tolerate last-level cache misses.
CPR/CFP scale the register file by aggressively
reclaiming the destination registers of many in-flight
instructions. However, an analogous mechanism does not
exist for stores and loads. As the window expands,
CPR/CFP processors must track all in-flight stores and
loads to support forwarding and detect memory ordering
violations.\par
The previously-described SVW (Store Vulnerability
Window) and SQIP (Store Queue Index Prediction) schemes
provide scalable, non-associative load and store
queues, respectively. However, they don't work smoothly
in a CPR/CFP context. SVW/SQIP rely on the ability to
dynamically stall some loads until a specific older
store writes to the cache. Enforcing this serialization
in CPR/CFP is expensive if the load and store are in
the same checkpoint.\par
We introduce two complementary procedures that
implement this serialization efficiently. Decoupled
Store Completion (DSC) allows stores to write to the
cache before the enclosing checkpoint completes
execution. Silent Deterministic Replay (SDR) supports
mis-speculation recovery in the presence of DSC by
replaying loads older than completed stores using
values from the load queue. The combination of DSC and
SDR enables an SVW/SQIP based CPR/CFP memory system
that outperforms previous designs while occupying less
area.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "checkpoint processors; load-store queues",
}
@Article{Zheng:2009:DDB,
author = "Hongzhong Zheng and Jiang Lin and Zhao Zhang and
Zhichun Zhu",
title = "Decoupled {DIMM}: building high-bandwidth memory
system using low-speed {DRAM} devices",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "255--266",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555788",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The widespread use of multicore processors has
dramatically increased the demands on high bandwidth
and large capacity from memory systems. In a
conventional DDR2/DDR3 DRAM memory system, the memory
bus and DRAM devices run at the same data rate. To
improve memory bandwidth, we propose a new memory
system design called decoupled DIMM that allows the
memory bus to operate at a data rate much higher than
that of the DRAM devices. In the design, a
synchronization buffer is added to relay data between
the slow DRAM devices and the fast memory bus; and
memory access scheduling is revised to avoid access
conflicts on memory ranks. The design not only improves
memory bandwidth beyond what can be supported by
current memory devices, but also improves reliability,
power efficiency, and cost effectiveness by using
relatively slow memory devices. The idea of decoupling,
precisely the decoupling of bandwidth match between
memory bus and a single rank of devices, can also be
applied to other types of memory systems including
FB-DIMM.\par
Our experimental results show that a decoupled DIMM
system of 2667MT/s bus data rate and 1333MT/s device
data rate improves the performance of memory-intensive
workloads by 51\% on average over a conventional memory
system of 1333MT/s data rate. Alternatively, a
decoupled DIMM system of 1600MT/s bus data rate and
800MT/s device data rate incurs only 8\% performance
loss when compared with a conventional system of
1600MT/s data rate, with 16\% reduction on the memory
power consumption and 9\% saving on memory energy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bandwidth decoupling; decoupled DIMM; DRAM memories",
}
@Article{Lim:2009:DME,
author = "Kevin Lim and Jichuan Chang and Trevor Mudge and
Parthasarathy Ranganathan and Steven K. Reinhardt and
Thomas F. Wenisch",
title = "Disaggregated memory for expansion and sharing in
blade servers",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "267--278",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555789",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Analysis of technology and application trends reveals
a growing imbalance in the peak
compute-to-memory-capacity ratio for future servers. At
the same time, the fraction contributed by memory
systems to total datacenter costs and power consumption
during typical usage is increasing. In response to
these trends, this paper re-examines traditional
compute-memory co-location on a single system and
details the design of a new general-purpose
architectural building block-a memory blade-that allows
memory to be 'disaggregated' across a system ensemble.
This remote memory blade can be used for memory
capacity expansion to improve performance and for
sharing memory across servers to reduce provisioning
and power costs. We use this memory blade building
block to propose two new system architecture
solutions-(1) page-swapped remote memory at the
virtualization layer, and (2) block-access remote
memory with support in the coherence hardware-that
enable transparent memory expansion and sharing on
commodity-based systems. Using simulations of a mix of
enterprise benchmarks supplemented with traces from
live datacenters, we demonstrate that memory
disaggregation can provide substantial performance
benefits (on average 10X) in memory constrained
environments, while the sharing enabled by our
solutions can improve performance-per-dollar by up to
57\% when optimizing memory provisioning across
multiple servers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "disaggregated memory; memory blades; memory capacity
expansion; power and cost efficiencies",
}
@Article{Dirik:2009:PPS,
author = "Cagdas Dirik and Bruce Jacob",
title = "The performance of {PC} solid-state disks {(SSDs)} as
a function of bandwidth, concurrency, device
architecture, and system organization",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "279--289",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555790",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As their prices decline, their storage capacities
increase, and their endurance improves, NAND Flash
Solid State Disks (SSD) provide an increasingly
attractive alternative to Hard Disk Drives (HDD) for
portable computing systems and PCs. This paper presents
a study of NAND Flash SSD architectures and their
management techniques, quantifying SSD performance
under user-driven/PC applications in a multi-tasked
environment; user activity represents typical PC
workloads and includes browsing files and folders,
emailing, text editing and document creation, surfing
the web, listening to music and playing movies, editing
large pictures, and running office applications.\par
We find the following: (a) the real limitation to NAND
Flash memory performance is not its low per-device
bandwidth but its internal core interface; (b) NAND
Flash memory media transfer rates do not need to scale
up to those of HDDs for good performance; (c) SSD
organizations that exploit concurrency at both the
system and device level (e.g. RAID-like organizations
and Micron-style (superblocks) improve performance
significantly); and (d) these system- and device-level
concurrency mechanisms are, to a significant degree,
orthogonal: that is, the performance increase due to
one does not come at the expense of the other, as each
exploits a different facet of concurrency exhibited
within the PC workload.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "flash memory; performance; solid state disks; storage
systems",
}
@Article{Bhattacharjee:2009:TCP,
author = "Abhishek Bhattacharjee and Margaret Martonosi",
title = "Thread criticality predictors for dynamic performance,
power, and resource management in chip
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "290--301",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555792",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the shift towards chip multiprocessors (CMPs),
exploiting and managing parallelism has become a
central problem in computing systems. Many issues of
parallelism management boil down to discerning which
running threads or processes are critical, or slowest,
versus which are non-critical. If one can accurately
predict critical threads in a parallel program, then
one can respond in a variety of ways. Possibilities
include running the critical thread at a faster clock
rate, performing load balancing techniques to offload
work onto currently non-critical threads, or giving the
critical thread more on-chip resources to execute
faster.\par
This paper proposes and evaluates simple but effective
thread criticality predictors for parallel
applications. We show that accurate predictors can be
built using counters that are typically already
available on-chip. Our predictor, based on memory
hierarchy statistics, identifies thread criticality
with an average accuracy of 93\% across a range of
architectures.\par
We also demonstrate two applications of our predictor.
First, we show how Intel's Threading Building Blocks
(TBB) parallel runtime system can benefit from task
stealing techniques that use our criticality predictor
to reduce load imbalance. Using criticality prediction
to guide TBB's task-stealing decisions improves
performance by 13-32\% for TBB-based PARSEC benchmarks
running on a 32-core CMP. As a second application,
criticality prediction guides dynamic energy
optimizations in barrier-based applications. By running
the predicted critical thread at the full clock rate
and frequency-scaling non-critical threads, this
approach achieves average energy savings of 15\% while
negligibly degrading performance for SPLASH-2 and
PARSEC benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "caches; DVFS; Intel TBB; parallel processing; thread
criticality prediction",
}
@Article{Rangan:2009:TMF,
author = "Krishna K. Rangan and Gu-Yeon Wei and David Brooks",
title = "Thread motion: fine-grained power management for
multi-core systems",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "302--313",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555793",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Dynamic voltage and frequency scaling (DVFS) is a
commonly-used power-management scheme that dynamically
adjusts power and performance to the time-varying needs
of running programs. Unfortunately, conventional DVFS,
relying on off-chip regulators, faces limitations in
terms of temporal granularity and high costs when
considered for future multi-core systems. To overcome
these challenges, this paper presents thread motion
(TM), a fine-grained power-management scheme for chip
multiprocessors (CMPs). Instead of incurring the high
cost of changing the voltage and frequency of different
cores, TM enables rapid movement of threads to adapt
the time-varying computing needs of running
applications to a mixture of cores with fixed but
different power/performance levels. Results show that
for the same power budget, two voltage/frequency levels
are sufficient to provide performance gains
commensurate to idealized scenarios using per-core
voltage control. Thread motion extends workload-based
power management into the nanosecond realm and, for a
given power budget, provides up to 20\% better
performance than coarse-grained DVFS.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "DVFS; multi-core power management; thread motion",
}
@Article{Wang:2009:TCP,
author = "Yefu Wang and Kai Ma and Xiaorui Wang",
title = "Temperature-constrained power control for chip
multiprocessors with online model estimation",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "314--324",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555794",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As chip multiprocessors (CMP) become the main trend in
processor development, various power and thermal
management strategies have recently been proposed to
optimize system performance while controlling the power
or temperature of a CMP chip to stay below a
constraint. The availability of per-core DVFS (dynamic
voltage and frequency scaling) also makes it possible
to develop advanced management strategies. However,
most existing solutions rely on open-loop search or
optimization with the assumption that power can be
estimated accurately, while others adopt oversimplified
feedback control strategies to control power and
temperature separately, without any theoretical
guarantees. In this paper, we propose a chip-level
power control algorithm that is systematically designed
based on optimal control theory. Our algorithm can
precisely control the power of a CMP chip to the
desired set point while maintaining the temperature of
each core below a specified threshold. Furthermore, an
online model estimator is designed to achieve
analytical assurance of control accuracy and system
stability, even in the face of significant workload
variations or unpredictable chip or core variations.
Empirical results on a physical testbed show that our
controller outperforms two state-of-the-art control
algorithms by having better SPEC benchmark performance
and more precise power control. In addition, extensive
simulation results demonstrate the efficacy of our
algorithm for various CMP configurations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessor; feedback control; power
management",
}
@Article{Yu:2009:CIC,
author = "Jie Yu and Satish Narayanasamy",
title = "A case for an interleaving constrained shared-memory
multi-processor",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "325--336",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555796",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Shared-memory multi-threaded programming is inherently
more difficult than single-threaded programming. The
main source of complexity is that, the threads of an
application can interleave in so many different ways.
To ensure correctness, a programmer has to test all
possible thread interleavings, which, however, is
impractical.\par
Many rare thread interleavings remain untested in
production systems, and they are the root cause for a
majority of concurrency bugs. We propose a
shared-memory multi-processor design that avoids
untested interleavings to improve the correctness of a
multi-threaded program. Since untested interleavings
tend to occur infrequently at runtime, the performance
cost of avoiding them is not high.\par
We propose to encode the set of tested correct
interleavings in a program's binary executable using
{\em Predecessor Set (PSet)\/} constraints. These
constraints are efficiently enforced at runtime using
processor support, which ensures that the runtime
follows a tested interleaving. We analyze several bugs
in open source applications such as MySQL, Apache,
Mozilla, etc., and show that, by enforcing PSet
constraints, we can avoid not only data races and
atomicity violations, but also other forms of
concurrency bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "concurrency bugs; multiprocessors; parallel
programming; software reliability",
}
@Article{Muzahid:2009:SSB,
author = "Abdullah Muzahid and Dario Su{\'a}rez and Shanxiang Qi
and Josep Torrellas",
title = "{SigRace}: signature-based data race detection",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "337--348",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555797",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Detecting data races in parallel programs is important
for both software development and production-run
diagnosis. Recently, there have been several proposals
for hardware-assisted data race detection. Such
proposals typically modify the L1 cache and cache
coherence protocol messages, and largely lose their
capability when lines get displaced or invalidated from
the cache. To eliminate these shortcomings, this paper
proposes a novel, different approach to
hardware-assisted data race detection. The approach,
called SigRace, relies on hardware address signatures.
As a processor runs, the addresses of the data that it
accesses are automatically encoded in signatures. At
certain times, the signatures are automatically passed
to a hardware module that intersects them with those of
other processors. If the intersection is not null, a
data race may have occurred.\par
This paper presents the architecture of SigRace, an
implementation, and its software interface. With
SigRace, caches and coherence protocol messages are
unmodified. Moreover, cache lines can be displaced and
invalidated with no effect. Our experiments show that
SigRace is significantly more effective than a
state-of-the-art conventional hardware-assisted race
detector. SigRace finds on average 29\% more static
races and 107\% more dynamic races. Moreover, if we
inject data races, SigRace finds 150\% more static
races than the conventional scheme.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "concurrency defect; data race; happened-before;
signature; SigRace; timestamp",
}
@Article{Nagarajan:2009:EEC,
author = "Vijay Nagarajan and Rajiv Gupta",
title = "{ECMon}: exposing cache events for monitoring",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "349--360",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555798",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The advent of multicores has introduced new challenges
for programmers to provide increased performance and
software reliability. There has been significant
interest in techniques that use software speculation to
better utilize the computational power of multicores.
At the same time, several recent proposals for ensuring
software reliability are not applicable in a multicore
setting due to their inability to handle interprocessor
shared memory dependences (ISMDs). The demands for
performing speculation and ensuring software
reliability in a multicore setting, although seemingly
different, share a common requirement: the need for
monitoring program execution and collecting
interprocessor dependence information at low overhead.
For example, an important component of speculation is
the efficient detection of misspeculation which in turn
requires dependence information. Likewise, tasks that
help ensure software reliability on multicores,
including {\em recording for replay}, require ISMD
information.\par
In this paper, we propose {\em ECMon:\/} support for
exposing cache events to the software. This enables the
programmer to catch these events and react to them; in
effect, efficiently exposing the ISMDs to the
programmer. In the context of speculation, we show how
{\em ECMon\/} optimizes the detection of
miss-speculation; we use this simple support to
speculate past active barriers and achieve a speedup of
12\% for the set of parallel programs considered. As an
application of ensuring software reliability, we show
how {\em ECMon\/} can be used to record shared memory
dependences on multicores using no specialized hardware
support at only 2.8 fold execution time overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache events; recording for replay; speculation past
barriers",
}
@Article{Saidi:2009:EEP,
author = "Ali G. Saidi and Nathan L. Binkert and Steven K.
Reinhardt and Trevor Mudge",
title = "End-to-end performance forecasting: finding
bottlenecks before they happen",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "361--370",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555800",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many important workloads today, such as web-hosted
services, are limited not by processor core performance
but by interactions among the cores, the memory system,
I/O devices, and the complex software layers that tie
these components together. Architects designing future
systems for these workloads are challenged to identify
performance bottlenecks because, as in any concurrent
system, overheads in one component may be hidden due to
overlap with other operations. These overlaps span the
user/kernel and software/hardware boundaries, making
traditional performance analysis techniques
inadequate.\par
We present a methodology for identifying end-to-end
critical paths across software and simulated hardware
in complex networked systems. By modeling systems as
collections of state machines interacting via queues,
we can trace critical paths through multiplexed
processing engines, identify when resources create
bottlenecks (including abstract resources such as
flow-control credits), and predict the benefit of
eliminating bottlenecks by increasing hardware speeds
or expanding available resources.\par
We implement our technique in a full-system simulator
and analyze a TCP microbenchmark, a web server, the
Linux TCP/IP stack, and an Ethernet controller. From a
single run of the microbenchmark, our tool--within
minutes--correctly identifies a series of bottlenecks,
and predicts the performance of hypothetical systems in
which these bottlenecks are successively eliminated,
culminating in a total speedup of 3X. We then validate
these predictions through hours of additional
simulation, and find them to be accurate within
1--17\%. We also analyze the web server, find it to be
CPU-bound, and predict the performance of a system with
an additional core within 6\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "critical path analysis; performance analysis",
}
@Article{Rogers:2009:SBW,
author = "Brian M. Rogers and Anil Krishna and Gordon B. Bell
and Ken Vu and Xiaowei Jiang and Yan Solihin",
title = "Scaling the bandwidth wall: challenges in and avenues
for {CMP} scaling",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "371--382",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555801",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As transistor density continues to grow at an
exponential rate in accordance to Moore's law, the goal
for many Chip Multi-Processor (CMP) systems is to scale
the number of on-chip cores proportionally.
Unfortunately, off-chip memory bandwidth capacity is
projected to grow slowly compared to the desired growth
in the number of cores. This creates a situation in
which each core will have a decreasing amount of
off-chip bandwidth that it can use to load its data
from off-chip memory. The situation in which off-chip
bandwidth is becoming a performance and throughput
bottleneck is referred to as the {\em bandwidth wall\/}
problem.\par
In this study, we seek to answer two questions: (1) to
what extent does the bandwidth wall problem restrict
future multicore scaling, and (2) to what extent are
various bandwidth conservation techniques able to
mitigate this problem. To address them, we develop a
simple but powerful analytical model to predict the
number of on-chip cores that a CMP can support given a
limited growth in memory traffic capacity. We find that
the bandwidth wall can severely limit core scaling.
When starting with a balanced 8-core CMP, in four
technology generations the number of cores can only
scale to 24, as opposed to 128 cores under proportional
scaling, without increasing the memory traffic
requirement. We find that various individual bandwidth
conservation techniques we evaluate have a wide ranging
impact on core scaling, and when combined together,
these techniques have the potential to enable
super-proportional core scaling for up to 4 technology
generations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "analytical model; chip multi-processor; memory
bandwidth",
}
@Article{Whitney:2009:FTA,
author = "Mark G. Whitney and Nemanja Isailovic and Yatish Patel
and John Kubiatowicz",
title = "A fault tolerant, area efficient architecture for
{Shor}'s factoring algorithm",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "383--394",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555802",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We optimize the area and latency of Shor's factoring
while simultaneously improving fault tolerance through:
(1) balancing the use of ancilla generators, (2)
aggressive optimization of error correction, and (3)
tuning the core adder circuits. Our custom CAD flow
produces detailed layouts of the physical components
and utilizes simulation to analyze circuits in terms of
area, latency, and success probability. We introduce a
metric, called ADCR, which is the probabilistic
equivalent of the classic Area-Delay product. Our error
correction optimization can reduce ADCR by order of
magnitude or more. Contrary to conventional wisdom, we
show that the area of an optimized quantum circuit is
{\em not\/} dominated exclusively by
error\par
correction. Further, our adder evaluation shows that
quantum carry-lookahead adders (QCLA) beat ripple-carry
adders in ADCR, despite being larger and more complex.
We conclude with what we believe is one of most
accurate estimates of the area and latency required for
1024-bit Shor's factorization: 7659 mm$^2$ for the
smallest circuit and 6 x 10$^8$ seconds for the fastest
circuit.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "CAD; control; ion trap; layout; quantum computing",
}
@Article{Putnam:2009:PPC,
author = "Andrew Putnam and Susan Eggers and Dave Bennett and
Eric Dellinger and Jeff Mason and Henry Styles and
Prasanna Sundararajan and Ralph Wittig",
title = "Performance and power of cache-based reconfigurable
computing",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "395--405",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555804",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many-cache is a memory architecture that efficiently
supports caching in commercially available FPGAs. It
facilitates FPGA programming for high-performance
computing (HPC) developers by providing them with
memory performance that is greater and power
consumption that is less than their current CPU
platforms, but without sacrificing their familiar,
C-based programming environment.\par
Many-cache creates multiple, multi-banked caches on top
of an FGPA's small, independent memories, each
targeting a particular data structure or region of
memory in an application and each customized for the
memory operations that access it. The caches are
automatically generated from C source by the CHiMPS
C-to-FPGA compiler.\par
This paper presents the analyses and optimizations of
the CHiMPS compiler that construct many-cache caches.
An architectural evaluation of CHiMPS-generated FPGAs
demonstrates a performance advantage of 7.8x (geometric
mean) over CPU-only execution of the same source code,
FPGA power usage that is on average 4.1x less, and
consequently performance per watt that is also greater,
by a geometric mean of 21.3x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "C-to-gates; C-to-hardware; caches; co-processor
accelerator; FPGA; many-cache; synthesis compiler",
}
@Article{Firoozshahian:2009:MSD,
author = "Amin Firoozshahian and Alex Solomatnikov and Ofer
Shacham and Zain Asgar and Stephen Richardson and
Christos Kozyrakis and Mark Horowitz",
title = "A memory system design framework: creating smart
memories",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "406--417",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555805",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As CPU cores become building blocks, we see a great
expansion in the types of on-chip memory systems
proposed for CMPs. Unfortunately, designing the cache
and protocol controllers to support these memory
systems is complex, and their concurrency and latency
characteristics significantly affect the performance of
any CMP. To address this problem, this paper presents a
microarchitecture framework for cache and protocol
controllers, which can aid in generating the RTL for
new memory systems. The framework consists of three
pipelined engines' request-tracking,
state-manipulation, and data movement' which are
programmed to implement a higher-level memory model.
This approach simplifies the design and verification of
CMP systems by decomposing the memory model into
sequences of state and data manipulations. Moreover,
implementing the framework itself produces a
polymorphic memory system.\par
To validate the approach, we implemented a scalable,
flexible CMP in silicon. The memory system was then
programmed to support three disparate memory models'
cache coherent shared memory, streams and transactional
memory. Measured overheads of this approach seem
promising. Our system generates controllers with
performance overheads of less than 20\% compared to an
ideal controller with zero internal latency. Even the
overhead of directly implementing a fully programmable
controller was modest. While it did double the
controller's area, the amortized effective area in the
system grew by roughly 7\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache coherence; memory access protocol; memory
systems; multi-core processors; protocol controller;
reconfigurable architecture; stream programming;
transactional memory",
}
@Article{Joao:2009:FRC,
author = "Jos{\'e} A. Joao and Onur Mutlu and Yale N. Patt",
title = "Flexible reference-counting-based hardware
acceleration for garbage collection",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "418--428",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555806",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Languages featuring automatic memory management
(garbage collection) are increasingly used to write all
kinds of applications because they provide clear
software engineering and security advantages.
Unfortunately, garbage collection imposes a toll on
performance and introduces pause times, making such
languages less attractive for high-performance or
real-time applications. Much progress has been made
over the last five decades to reduce the overhead of
garbage collection, but it remains significant.\par
We propose a cooperative hardware-software technique to
reduce the performance overhead of garbage collection.
The key idea is to reduce the frequency of garbage
collection by efficiently detecting and reusing dead
memory space in hardware via hardware-implemented
reference counting. Thus, even though software garbage
collections are still eventually needed, they become
much less frequent and have less impact on overall
performance. Our technique is compatible with a variety
of software garbage collection algorithms, does not
break compatibility with existing software, and reduces
garbage collection time by 31\% on average on the Java
DaCapo benchmarks running on the production build of
the Jikes RVM, which uses a state-of-the-art
generational garbage collector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "garbage collection; reference counting",
}
@Article{Pan:2009:FIF,
author = "Yan Pan and Prabhat Kumar and John Kim and Gokhan
Memik and Yu Zhang and Alok Choudhary",
title = "{Firefly}: illuminating future network-on-chip with
nanophotonics",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "429--440",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555808",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Future many-core processors will require
high-performance yet energy-efficient on-chip networks
to provide a communication substrate for the increasing
number of cores. Recent advances in silicon
nanophotonics create new opportunities for on-chip
networks. To efficiently exploit the benefits of
nanophotonics, we propose Firefly - a hybrid,
hierarchical network architecture. Firefly consists of
clusters of nodes that are connected using
conventional, electrical signaling while the
inter-cluster communication is done using nanophotonics
- exploiting the benefits of electrical signaling for
short, local communication while nanophotonics is used
only for global communication to realize an efficient
on-chip network. Crossbar architecture is used for
inter-cluster communication. However, to avoid global
arbitration, the crossbar is partitioned into multiple,
logical crossbars and their arbitration is localized.
Our evaluations show that Firefly improves the
performance by up to 57\% compared to an all-electrical
concentrated mesh (CMESH) topology on adversarial
traffic patterns and up to 54\% compared to an
all-optical crossbar (OP XBAR) on traffic patterns with
locality. If the energy-delay-product is compared,
Firefly improves the efficiency of the on-chip network
by up to 51\% and 38\% compared to CMESH and OP XBAR,
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "hierarchical network; interconnection networks;
nanophotonics; topology",
}
@Article{Cianchetti:2009:PRT,
author = "Mark J. Cianchetti and Joseph C. Kerekes and David H.
Albonesi",
title = "{Phastlane}: a rapid transit optical routing network",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "441--450",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555809",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Tens and eventually hundreds of processing cores are
projected to be integrated onto future microprocessors,
making the global interconnect a key component to
achieving scalable chip performance within a given
power envelope. While CMOS-compatible nanophotonics has
emerged as a leading candidate for replacing global
wires beyond the 22nm timeframe, on-chip optical
interconnect architectures proposed thus far are either
limited in scalability or are dependent on
comparatively slow electrical control networks.\par
In this paper, we present Phastlane, a hybrid
electrical/optical routing network for future large
scale, cache coherent multicore microprocessors. The
heart of the Phastlane network is a low-latency optical
crossbar that uses simple predecoded source routing to
transmit cache-line-sized packets several hops in a
single clock cycle under contentionless conditions.
When contention exists, the router makes use of
electrical buffers and, if necessary, a high speed drop
signaling network. Overall, Phastlane achieve 2X better
network performance than a state-of-the-art electrical
baseline while consuming 80\% less network power.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "interconnection networks; multicore; nanophotonics;
optical interconnects",
}
@Article{Abts:2009:APP,
author = "Dennis Abts and Natalie D. Enright Jerger and John Kim
and Dan Gibson and Mikko H. Lipasti",
title = "Achieving predictable performance through better
memory controller placement in many-core {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "451--461",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555810",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the near term, Moore's law will continue to provide
an increasing number of transistors and therefore an
increasing number of on-chip cores. Limited pin
bandwidth prevents the integration of a large number of
memory controllers on-chip. With many cores, and few
memory controllers, where to locate the memory
controllers in the on-chip interconnection fabric
becomes an important and as yet unexplored question. In
this paper we show how the location of the memory
controllers can reduce contention (hot spots) in the
on-chip fabric and lower the variance in reference
latency. This in turn provides predictable performance
for memory-intensive applications regardless of the
processing core on which a thread is scheduled. We
explore the design space of on-chip fabrics to find
optimal memory controller placement relative to
different topologies (i.e. mesh and torus), routing
algorithms, and workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; interconnection networks; memory
controllers; routing algorithms",
}
@Article{Luo:2009:DPT,
author = "Yangchun Luo and Venkatesan Packirisamy and Wei-Chung
Hsu and Antonia Zhai and Nikhil Mungre and Ankit
Tarkas",
title = "Dynamic performance tuning for speculative threads",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "462--473",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555812",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In response to the emergence of multicore processors,
various novel and sophisticated execution models have
been introduced to fully utilize these processors. One
such execution model is Thread-Level Speculation (TLS),
which allows potentially dependent threads to execute
speculatively in parallel. While TLS offers significant
performance potential for applications that are
otherwise non-parallel, extracting efficient
speculative threads in the presence of complex control
flow and ambiguous data dependences is a real
challenge. This task is further complicated by the fact
that the performance of speculative threads is often
architecture-dependent, input-sensitive, and exhibits
phase behaviors. Thus we propose dynamic performance
tuning mechanisms that determine where and how to
create speculative threads at runtime.\par
This paper describes the design, implementation, and
evaluation of hardware and software support that takes
advantage of runtime performance profiles to extract
efficient speculative threads. In our proposed
framework, speculative threads are monitored by
hardware-based performance counters and their
performance impact is estimated. The creation of
speculative threads is adjusted based on the
estimation. This paper proposes speculative threads
performance estimation techniques, that are capable of
correctly determining whether speculation can improve
performance for loops that corresponds to 83.8\% of
total loop execution time across all benchmarks. This
paper also examines several dynamic performance tuning
policies and finds that the best tuning policy achieves
an overall speedup of 36.8\%on a set of benchmarks from
SPEC2000 suite, which outperforms static thread
management by 9.5\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dynamic optimization; multicore; parallelism;
thread-level speculation",
}
@Article{Madriles:2009:BST,
author = "Carlos Madriles and Pedro L{\'o}pez and Josep M.
Codina and Enric Gibert and Fernando Latorre and
Alejandro Martinez and Ra{\'u}l Martinez and Antonio
Gonzalez",
title = "Boosting single-thread performance in multi-core
systems through fine-grain multi-threading",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "474--483",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555754.1555813",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Industry has shifted towards multi-core designs as we
have hit the memory and power walls. However, single
thread performance remains of paramount importance
since some applications have limited thread-level
parallelism (TLP), and even a small part with limited
TLP impose important constraints to the global
performance, as explained by Amdahl's law.\par
In this paper we propose a novel approach for
leveraging multiple cores to improve single-thread
performance in a multi-core design. The proposed
technique features a set of novel hardware mechanisms
that support the execution of threads generated at
compile time. These threads result from a fine-grain
speculative decomposition of the original application
and they are executed under a modified multi-core
system that includes: (1) mechanisms to support
multiple versions; (2) mechanisms to detect violations
among threads; (3) mechanisms to reconstruct the
original sequential order; and (4) mechanisms to
checkpoint the architectural state and recovery to
handle misspeculations.\par
The proposed scheme outperforms previous hardware-only
schemes to implement the idea of combining cores for
executing single-thread applications in a multi-core
design by more than 10\% on average on Spec2006 for all
configurations. Moreover, single-thread performance is
improved by 41\% on average when the proposed scheme is
used on a Tiny Core, and up to 2.6x for some selected
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "automatic parallelization; core-fusion; multicore;
single-thread performance; speculative multithreading;
thread-level parallelism",
}
@Article{Chaudhry:2009:SST,
author = "Shailender Chaudhry and Robert Cypher and Magnus Ekman
and Martin Karlsson and Anders Landin and Sherman Yip
and H{\aa}kan Zeffer and Marc Tremblay",
title = "Simultaneous speculative threading: a novel pipeline
architecture implemented in {Sun}'s {Rock} processor",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "484--495",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555814",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents Simultaneous Speculative Threading
(SST), which is a technique for creating
high-performance area- and power-efficient cores for
chip multiprocessors. SST hardware dynamically extracts
two threads of execution from a single sequential
program (one consisting of a load miss and its
dependents, and the other consisting of the
instructions that are independent of the load miss) and
executes them in parallel. SST uses an efficient
checkpointing mechanism to eliminate the need for
complex and power-inefficient structures such as
register renaming logic, reorder buffers, memory
disambiguation buffers, and large issue windows.
Simulations of certain SST implementations show 18\%
better per-thread performance on commercial benchmarks
than larger and higher-powered out-of-order cores. Sun
Microsystems' ROCK processor, which is the first
processor to use SST cores, has been implemented and is
scheduled to be commercially available in 2009.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "checkpoint-based architecture; chip multiprocessor;
CMP; hardware speculation; instruction-level
parallelism; memory-level parallelism; processor
architecture; SST",
}
@Article{Thomasian:2009:PSS,
author = "Alexander Thomasian",
title = "Publications on storage and systems research",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "4",
pages = "1--26",
month = sep,
year = "2009",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Mar 15 19:03:39 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Musoll:2009:MBM,
author = "Enric Musoll",
title = "Mesh-based many-core performance under process
variations: a core yield perspective",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "4",
pages = "27--34",
month = sep,
year = "2009",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Mar 15 19:03:39 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nikolov:2009:QTM,
author = "Angel V. Nikolov",
title = "Queuing theoretic model for a multiprocessor with
private caches and shared memory",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "4",
pages = "35--44",
month = sep,
year = "2009",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Mar 15 19:03:39 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2009:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "4",
pages = "45--51",
month = sep,
year = "2009",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Mon Mar 15 19:03:39 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Musoll:2009:LSO,
author = "Enric Musoll",
title = "Leakage-saving opportunities in mesh-based massive
multi-core architectures",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "5",
pages = "1--7",
month = dec,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1755235.1755237",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Apr 8 18:42:25 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "When processing multi-threaded workloads requiring
significant inter-thread communication, opportunities
to reduce power consumption arise due to the large
latencies in obtaining data from the threads running on
remote cores and the lack of architectural resources
implemented in the simple cores to cover these
latencies.\par
In this work we propose to use the drowsy mode
technique to save leakage power on the cores and
leverage the mesh-based communication fabric to hide
the wake-up latency of the core blocks. We have
observed a potential for reducing the overall power of
around 70\% in a generic homogeneous 256-core
tile-based multi-core architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Naeem:2009:SRC,
author = "Abdul Naeem and Xiaowen Chen and Zhonghai Lu and Axel
Jantsch",
title = "Scalability of relaxed consistency models in {NoC}
based multicore architectures",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "5",
pages = "8--15",
month = dec,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1755235.1755238",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Apr 8 18:42:25 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper studies realization of relaxed memory
consistency models in the network-on-chip based
distributed shared memory (DSM) multi-core systems.
Within DSM systems, memory consistency is a critical
issue since it affects not only the performance but
also the correctness of programs. We investigate the
scalability of the relaxed consistency models (weak,
release consistency) implemented by using transaction
counters. Our experimental results compare the average
and maximum code, synchronization and data latencies of
the two consistency models for various network sizes
with regular mesh topologies. The observed latencies
rise for both the consistency models as the network
size grows. However, the scaling behaviors are
different. With the release consistency model these
latencies grow significantly slower than with the weak
consistency due to better optimization potential by
means of overlapping, reordering and program order
relaxations. The release consistency improves the
performance by 15.6\% and 26.5\% on average in the code
and consistency latencies over the weak consistency
model for the specific application, as the system grows
from single core to 64 cores. The latency of data
transactions grows 2.2 times faster on the average with
a weak consistency model than with a release
consistency model when the system scales from single
core to 64 core",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "distributed shared memory; memory consistency;
scalability; synchronization",
}
@Article{Sharma:2009:RPL,
author = "Sandeep Sharma and K. S. Kahlon and P. K. Bansal",
title = "Reliability and path length analysis of irregular
fault tolerant multistage interconnection network",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "5",
pages = "16--23",
month = dec,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1755235.1755239",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Apr 8 18:42:25 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper reliability and path length analysis of
irregular Multistage Interconnection Networks have been
presented. We have examined FT(Four
Tree)[8],MFT(Modified Four Tree)[2],NFT(New Four
Tree)[4],IFT(improved Four Tree)[5],IASN(Irregular
Augmented Shuffle)[14] and IIASN(Improved Irregular
Augmented Shuffle)[3] networks in which the number of
switches in each stage are different in numbers and
also have express links[11]. Using upper and lower
bounds[7][13][15] for larger networks, the
reliability[9] in terms of mean time to failure of all
these networks are evaluated and compared with each
other. Each source is connected to destination with one
or multiple paths with varying path lengths in a
network. The path length analysis of all these networks
is also analyzed in this paper. A path length[8]
algorithm for IIASN network is also propose",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "four tree network; IIASN; multistage interconnection
network; network reliability; NFT; path length; upper
bound reliability",
}
@Article{Thorson:2009:INc,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "5",
pages = "24--30",
month = dec,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1755235.1755241",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Apr 8 18:42:25 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brewer:2010:TDR,
author = "Eric A. Brewer",
title = "Technology for developing regions: {Moore's Law} is
not enough",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "1--2",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ipek:2010:DRM,
author = "Engin Ipek and Jeremy Condit and Edmund B. Nightingale
and Doug Burger and Thomas Moscibroda",
title = "Dynamically replicated memory: building reliable
systems from nanoscale resistive memories",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "3--14",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kirman:2010:PEA,
author = "Nevin Kirman and Jos{\'e} F. Mart{\'\i}nez",
title = "A power-efficient all-optical on-chip interconnect
using wavelength-based oblivious routing",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "15--28",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Neelakantam:2010:RSE,
author = "Naveen Neelakantam and David R. Ditzel and Craig
Zilles",
title = "A real system evaluation of hardware atomicity for
software speculation",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "29--38",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Harris:2010:DFM,
author = "Tim Harris and Sasa Tomic and Adri{\'a}n Cristal and
Osman Unsal",
title = "Dynamic filtering: multi-purpose architecture support
for language runtime systems",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "39--52",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bergan:2010:CCR,
author = "Tom Bergan and Owen Anderson and Joseph Devietti and
Luis Ceze and Dan Grossman",
title = "{CoreDet}: a compiler and runtime system for
deterministic multithreaded execution",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "53--64",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Raman:2010:SPU,
author = "Arun Raman and Hanjun Kim and Thomas R. Mason and
Thomas B. Jablin and David I. August",
title = "Speculative parallelization using software
multi-threaded transactions",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "65--76",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:2010:REO,
author = "Dongyoon Lee and Benjamin Wester and Kaushik
Veeraraghavan and Satish Narayanasamy and Peter M. Chen
and Jason Flinn",
title = "{Respec}: efficient online multiprocessor replay via
speculation and external determinism",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "77--90",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Eyerman:2010:PJS,
author = "Stijn Eyerman and Lieven Eeckhout",
title = "Probabilistic job symbiosis modeling for {SMT}
processor scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "91--102",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shen:2010:RBV,
author = "Kai Shen",
title = "Request behavior variations",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "103--116",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Johnson:2010:DCM,
author = "F. Ryan Johnson and Radu Stoica and Anastasia Ailamaki
and Todd C. Mowry",
title = "Decoupling contention management from scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "117--128",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhuravlev:2010:ASR,
author = "Sergey Zhuravlev and Sergey Blagodurov and Alexandra
Fedorova",
title = "Addressing shared resource contention in multicore
processors via scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "129--142",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuan:2010:SED,
author = "Ding Yuan and Haohui Mai and Weiwei Xiong and Lin Tan
and Yuanyuan Zhou and Shankar Pasupathy",
title = "{SherLog}: error diagnosis by connecting clues from
run-time logs",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "143--154",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weeratunge:2010:AMD,
author = "Dasarath Weeratunge and Xiangyu Zhang and Suresh
Jagannathan",
title = "Analyzing multicore dumps to facilitate concurrency
bug reproduction",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "155--166",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burckhardt:2010:RSP,
author = "Sebastian Burckhardt and Pravesh Kothari and Madanlal
Musuvathi and Santosh Nagarakatte",
title = "A randomized scheduler with probabilistic guarantees
of finding bugs",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "167--178",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2010:CDS,
author = "Wei Zhang and Chong Sun and Shan Lu",
title = "{ConMem}: detecting severe concurrency bugs through an
effect-oriented approach",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "179--192",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mesa-Martinez:2010:CPT,
author = "Francisco Javier Mesa-Martinez and Ehsan K. Ardestani
and Jose Renau",
title = "Characterizing processor thermal behavior",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "193--204",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Venkatesh:2010:CCR,
author = "Ganesh Venkatesh and Jack Sampson and Nathan Goulding
and Saturnino Garcia and Vladyslav Bryksin and Jose
Lugo-Martinez and Steven Swanson and Michael Bedford
Taylor",
title = "Conservation cores: reducing the energy of mature
computations",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "205--218",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sudan:2010:MPI,
author = "Kshitij Sudan and Niladrish Chatterjee and David
Nellans and Manu Awasthi and Rajeev Balasubramonian and
Al Davis",
title = "Micro-pages: increasing {DRAM} efficiency with
locality-aware data placement",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "219--230",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pelley:2010:PRD,
author = "Steven Pelley and David Meisner and Pooya Zandevakili
and Thomas F. Wenisch and Jack Underwood",
title = "Power routing: dynamic power provisioning in the data
center",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "231--242",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ahmad:2010:JOI,
author = "Faraz Ahmad and T. N. Vijaykumar",
title = "Joint optimization of idle and cooling power in data
centers while maintaining response time",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "243--256",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Goodstein:2010:BAA,
author = "Michelle L. Goodstein and Evangelos Vlachos and Shimin
Chen and Phillip B. Gibbons and Michael A. Kozuch and
Todd C. Mowry",
title = "Butterfly analysis: adapting dataflow analysis to
dynamic parallel monitoring",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "257--270",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vlachos:2010:PEA,
author = "Evangelos Vlachos and Michelle L. Goodstein and
Michael A. Kozuch and Shimin Chen and Babak Falsafi and
Phillip B. Gibbons and Todd C. Mowry",
title = "{ParaLog}: enabling and accelerating online parallel
monitoring of multithreaded applications",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "271--284",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hormati:2010:MMS,
author = "Amir H. Hormati and Yoonseo Choi and Mark Woh and
Manjunath Kudlur and Rodric Rabbah and Trevor Mudge and
Scott Mahlke",
title = "{MacroSS}: macro-{SIMDization} of streaming
applications",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "285--296",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Woo:2010:CPD,
author = "Dong Hyuk Woo and Hsien-Hsin S. Lee",
title = "{COMPASS}: a programmable data prefetcher using idle
{GPU} shaders",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "297--310",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sanchez:2010:FAS,
author = "Daniel Sanchez and Richard M. Yoo and Christos
Kozyrakis",
title = "Flexible architectural support for fine-grain
scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "311--322",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Romanescu:2010:SDV,
author = "Bogdan F. Romanescu and Alvin R. Lebeck and Daniel J.
Sorin",
title = "Specifying and dynamically verifying address
translation-aware memory consistency",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "323--334",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ebrahimi:2010:FST,
author = "Eiman Ebrahimi and Chang Joo Lee and Onur Mutlu and
Yale N. Patt",
title = "Fairness via source throttling: a configurable and
high-performance fairness substrate for multi-core
memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "335--346",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gelado:2010:ADS,
author = "Isaac Gelado and Javier Cabezas and Nacho Navarro and
John E. Stone and Sanjay Patel and Wen-mei W. Hwu",
title = "An asymmetric distributed shared memory model for
heterogeneous parallel systems",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "347--358",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bhattacharjee:2010:ICC,
author = "Abhishek Bhattacharjee and Margaret Martonosi",
title = "Inter-core cooperative {TLB} for chip
multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "359--370",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huang:2010:OES,
author = "Ruirui Huang and Daniel Y. Deng and G. Edward Suh",
title = "Orthrus: efficient software integrity protection on
multi-cores",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "371--384",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Feng:2010:SPS,
author = "Shuguang Feng and Shantanu Gupta and Amin Ansari and
Scott Mahlke",
title = "Shoestring: probabilistic soft error reliability on
the cheap",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "385--396",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yoon:2010:VFE,
author = "Doe Hyun Yoon and Mattan Erez",
title = "Virtualized and flexible {ECC} for main memory",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "1",
pages = "397--408",
month = mar,
year = "2010",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Wed Mar 17 14:42:04 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomasian:2010:SRI,
author = "Alexander Thomasian",
title = "Storage research in industry and universities",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "2",
pages = "1--48",
month = may,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1823838.1823840",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:38 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We review activities at universities and industrial
research centers in the storage area, but also briefly
mention topics such as processor design, operating
systems, databases, and performance analysis. Our
starting point is the Berkeley RAID proposal and the
associated taxonomy two decades ago. Important research
groups are listed and key researchers are identified.
We pay special attention to faculty/student
relationships, listing PhD theses and articles related
to storage. We also describe innovative storage
products and the companies behind them. This paper
complements author's 'Publications in Storage and
Systems', ACM CAN, Sept. 2009.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matthes:2010:RIC,
author = "Wolfgang Matthes",
title = "Resources instead of cores?",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "2",
pages = "49--63",
month = may,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1823838.1823841",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:38 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Mapping conventional applications to multiple cores is
a difficult problem. To provide a general solution, it
is proposed to abandon the very concept of processor
cores and to populate the silicon real estate with less
complex control and operation units, designated as
resources. A hardware-software API is described that
can put into effect a practically unlimited number of
such resources and that allows for completely
describing and exploiting the inherent parallelism of
the application problems. The paper introduces the
principles of operation, discusses problems of
feasibility and outlines the basic philosophy behind
the approach. The proposed principles may lead
to:\par
* Instruction set architectures which can cope with a
transfinite number of hardware resources.\par
* Processor circuits containing resources of
intermediate granularity and appropriately optimized
interconnects.\par
* Considerable reduction of power consumption during
operation at full speed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "computer architecture; inherent parallelism; multicore
processors; parallel computing; power saving",
}
@Article{Thorson:2010:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "2",
pages = "64--67",
month = may,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1823838.1823843",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:38 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dally:2010:MNC,
author = "William J. Dally",
title = "Moving the needle, computer architecture research in
academe and industry",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "1--1",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815963",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The goal of computer architecture research is to move
the needle, that is to affect the future of computing
in a positive way. Publications, prototypes, and
studies are all just different means to this common
end. This talk will address how to move the needle in
academic and industrial settings discussing what works
and what doesn't. Our work is constrained by
applications, technology, and commercial reality. The
architecture funnel starts with many concepts that
proceed through stages of evaluation and refinement. A
relatively few successful concepts make it out the far
side to deployment. Most concepts fail, and good
researchers cut their losses early. The funnel has many
years of latency and good researchers aim for results
that are relevant beyond this latency. Academics are
best at the early stages of the concept funnel -- where
their long-term perspective and freedom from
constraints are advantages. Industry excels at the
later stages of the pipeline where resources and
experience are well suited to refining ideas for
deployment. Too often good concepts fall into a chasm
between the two. Good partnerships are needed to bridge
this chasm. This talk will give illustrate this
exploration of architecture research with numerous
examples of successes and failures. It will give
recommended best practices for academic and industrial
research. I will close with a glimpse of the future of
architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "research",
}
@Article{Watanabe:2010:WWD,
author = "Yasuko Watanabe and John D. Davis and David A. Wood",
title = "{WiDGET: Wisconsin Decoupled Grid Execution Tiles}",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "2--13",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815965",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The recent paradigm shift to multi-core systems
results in high system throughput within a specified
power budget. However, future systems still require
good single thread performance--no longer the
predominant design priority--to mitigate sequential
bottlenecks and/or to guarantee service-level
agreements. Unfortunately, near saturation in voltage
scaling necessitates a long-term alternative to dynamic
voltage and frequency scaling.\par
We propose an energy-proportional computing
infrastructure, called WiDGET, that decouples thread
context management from a sea of simple execution units
(EUs). WiDGET's decoupled design provides flexibility
to alter resource allocation for a particular
power-performance target while turning off unallocated
resources. In other words, WiDGET enables dynamic
customization of different combinations of small and/or
powerful cores on a single chip, consuming power in
proportion to the delivered performance.\par
Over all SPEC CPU2006 benchmarks, WiDGET provides
average per-thread performance that is 26\% better than
a Xeon-like processor while using 8\% less power.
WiDGET can also scale down to a level comparable to an
Atom-like processor, turning off resources to reduce
average power by 58\%. WiDGET achieves high power
efficiency (BIPS$^3$ /W), exceeding Xeon-like and
Atom-like processors by up to 2x and 21x,
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "hardware; instruction steering; performance; power
efficiency; power proportional computing",
}
@Article{Gibson:2010:FSC,
author = "Dan Gibson and David A. Wood",
title = "{Forwardflow}: a scalable core for power-constrained
{CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "14--25",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815966",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Chip Multiprocessors (CMPs) are now commodity
hardware, but commoditization of parallel software
remains elusive. In the near term, the current trend of
increased core-per-socket count will continue, despite
a lack of parallel software to exercise the hardware.
Future CMPs must deliver thread-level parallelism when
software provides threads to run, but must also
continue to deliver performance gains for single
threads by exploiting instruction-level parallelism and
memory-level parallelism. However, power limitations
will prevent conventional cores from exploiting both
simultaneously.\par
This work presents the Forwardflow Architecture, which
can scale its execution logic up to run single threads,
or down to run multiple threads in a CMP. Forwardflow
dynamically builds an explicit internal dataflow
representation from a conventional instruction set
architecture, using forward dependence pointers to
guide instruction wakeup, selection, and issue.
Forwardflow's backend is organized into discrete units
that can be individually (de-)activated, allowing each
core's performance to be scaled by system software at
the architectural level.\par
On single threads, Forwardflow core scaling yields a
mean runtime reduction of 21\% for a 37\% increase in
power consumption. For multithreaded workloads, a
Forwardflow-based CMP allows system software to select
the performance point that best matches available
power.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessor (cmp); power; scalable core",
}
@Article{Azizi:2010:EPT,
author = "Omid Azizi and Aqeel Mahesri and Benjamin C. Lee and
Sanjay J. Patel and Mark Horowitz",
title = "Energy-performance tradeoffs in processor architecture
and circuit design: a marginal cost analysis",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "26--36",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815967",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Power consumption has become a major constraint in the
design of processors today. To optimize a processor for
energy-efficiency requires an examination of
energy-performance trade-offs in all aspects of the
processor design space, including both architectural
and circuit design choices. In this paper, we apply an
integrated architecture-circuit optimization framework
to map out energy-performance trade-offs of several
different high-level processor architectures. We show
how the joint architecture-circuit space provides a
trade-off range of approximately 6.5x in performance
for 4x energy, and we identify the optimal
architectures for different design objectives. We then
show that many of the designs in this space come at
very high marginal costs. Our results show that, for a
large range of design objectives, voltage scaling is
effective in efficiently trading off performance and
energy, and that the choice of optimal architecture and
circuits does not change much during voltage scaling.
Finally, we show that with only two designs--a
dual-issue in-order design and a dual-issue
out-of-order design, both properly optimized-a large
part of the energy-performance trade-off space can be
covered within 3\% of the optimal energy-efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "co-optimization; design space exploration; design
trade-offs; energy efficiency; microarchitecture;
optimization",
}
@Article{Hameed:2010:USI,
author = "Rehan Hameed and Wajahat Qadeer and Megan Wachs and
Omid Azizi and Alex Solomatnikov and Benjamin C. Lee
and Stephen Richardson and Christos Kozyrakis and Mark
Horowitz",
title = "Understanding sources of inefficiency in
general-purpose chips",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "37--47",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815968",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Due to their high volume, general-purpose processors,
and now chip multiprocessors (CMPs), are much more cost
effective than ASICs, but lag significantly in terms of
performance and energy efficiency. This paper explores
the sources of these performance and energy overheads
in general-purpose processing systems by quantifying
the overheads of a 720p HD H.264 encoder running on a
general-purpose CMP system. It then explores methods to
eliminate these overheads by transforming the CPU into
a specialized system for H.264 encoding. We evaluate
the gains from customizations useful to broad classes
of algorithms, such as SIMD units, as well as those
specific to particular computation, such as customized
storage and functional units.\par
The ASIC is 500x more energy efficient than our
original four-processor CMP. Broadly applicable
optimizations improve performance by 10x and energy by
7x. However, the very low energy costs of actual core
ops (100s fJ in 90nm) mean that over 90\% of the energy
used in these solutions is still 'overhead'. Achieving
ASIC-like performance and efficiency requires
algorithm-specific optimizations. For each
sub-algorithm of H.264, we create a large, specialized
functional unit that is capable of executing 100s of
operations per instruction. This improves performance
and energy by an additional 25x and the final
customized CMP matches an ASIC solution's performance
within 3x of its energy and within comparable area.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "ASIC; chip multiprocessor; customization; energy
efficiency; h.264; high performance; Tensilica",
}
@Article{Barr:2010:TCS,
author = "Thomas W. Barr and Alan L. Cox and Scott Rixner",
title = "Translation caching: skip, don't walk (the page
table)",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "48--59",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815970",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper explores the design space of MMU caches
that accelerate virtual-to-physical address translation
in processor architectures, such as x86-64, that use a
radix tree page table. In particular, these caches
accelerate the page table walk that occurs after a miss
in the Translation Lookaside Buffer. This paper shows
that the most effective MMU caches are translation
caches, which store partial translations and allow the
page walk hardware to skip one or more levels of the
page table.\par
In recent years, both AMD and Intel processors have
implemented MMU caches. However, their implementations
are quite different and represent distinct points in
the design space. This paper introduces three new MMU
cache structures that round out the design space and
directly compares the effectiveness of all five
organizations. This comparison shows that two of the
newly introduced structures, both of which are
translation cache variants, are better than existing
structures in many situations.\par
Finally, this paper contributes to the age-old
discourse concerning the relative effectiveness of
different page table organizations. Generally speaking,
earlier studies concluded that organizations based on
hashing, such as the inverted page table, outperformed
organizations based upon radix trees for supporting
large virtual address spaces. However, these studies
did not take into account the possibility of caching
page table entries from the higher levels of the radix
tree. This paper shows that any of the five MMU cache
structures will reduce radix tree page table DRAM
accesses far below an inverted page table.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "memory management; page walk caching; TLB",
}
@Article{Jaleel:2010:HPC,
author = "Aamer Jaleel and Kevin B. Theobald and Simon C.
{Steely, Jr.} and Joel Emer",
title = "High performance cache replacement using re-reference
interval prediction {(RRIP)}",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "60--71",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815971",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Practical cache replacement policies attempt to
emulate optimal replacement by predicting the
re-reference interval of a cache block. The commonly
used LRU replacement policy always predicts a
near-immediate re-reference interval on cache hits and
misses. Applications that exhibit a distant
re-reference interval perform badly under LRU. Such
applications usually have a working-set larger than the
cache or have frequent bursts of references to
non-temporal data (called scans). To improve the
performance of such workloads, this paper proposes
cache replacement using Re-reference Interval
Prediction (RRIP). We propose Static RRIP (SRRIP) that
is scan-resistant and Dynamic RRIP (DRRIP) that is both
scan-resistant and thrash-resistant. Both RRIP policies
require only 2-bits per cache block and easily
integrate into existing LRU approximations found in
modern processors. Our evaluations using PC games,
multimedia, server and SPEC CPU2006 workloads on a
single-core processor with a 2MB last-level cache (LLC)
show that both SRRIP and DRRIP outperform LRU
replacement on the throughput metric by an average of
4\% and 10\% respectively. Our evaluations with over
1000 multi-programmed workloads on a 4-core CMP with an
8MB shared LLC show that SRRIP and DRRIP outperform LRU
replacement on the throughput metric by an average of
7\% and 9\% respectively. We also show that RRIP
outperforms LFU, the state-of the art scan-resistant
replacement algorithm to-date. For the cache
configurations under study, RRIP requires 2X less
hardware than LRU and 2.5X less hardware than LFU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "replacement; scan resistance; shared cache;
thrashing",
}
@Article{Stuecheli:2010:VWQ,
author = "Jeffrey Stuecheli and Dimitris Kaseridis and David
Daly and Hillery C. Hunter and Lizy K. John",
title = "The virtual write queue: coordinating {DRAM} and
last-level cache policies",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "72--82",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815972",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In computer architecture, caches have primarily been
viewed as a means to hide memory latency from the CPU.
Cache policies have focused on anticipating the CPU's
data needs, and are mostly oblivious to the main
memory. In this paper, we demonstrate that the era of
many-core architectures has created new main memory
bottlenecks, and mandates a new approach: coordination
of cache policy with main memory characteristics. Using
the cache for memory optimization purposes, we propose
a Virtual Write Queue which dramatically expands the
memory controller's visibility of processor behavior,
at low implementation overhead. Through memory-centric
modification of existing policies, such as scheduled
writebacks, this paper demonstrates that performance
limiting effects of highly-threaded architectures can
be overcome. We show that through awareness of the
physical main memory layout and by focusing on writes,
both read and write average latency can be shortened,
memory power reduced, and overall system performance
improved. Through full-system cycle-accurate
simulations of SPEC cpu2006, we demonstrate that the
proposed Virtual Write Queue achieves an average 10.9\%
system-level throughput improvement on memory-intensive
workloads, along with an overall reduction of 8.7\% in
memory power across the whole suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cache-replacement; CMP many-core; DDR DDR2 DDR3; DRAM;
DRAM-parameters; last-level-cache; memory-scheduling
writeback; page-mode; write-queue; write-scheduling",
}
@Article{Wilkerson:2010:RCP,
author = "Chris Wilkerson and Alaa R. Alameldeen and Zeshan
Chishti and Wei Wu and Dinesh Somasekhar and Shih-lien
Lu",
title = "Reducing cache power with low-cost, multi-bit
error-correcting codes",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "83--93",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815973",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Technology advancements have enabled the integration
of large on-die embedded DRAM (eDRAM) caches. eDRAM is
significantly denser than traditional SRAMs, but must
be periodically refreshed to retain data. Like SRAM,
eDRAM is susceptible to device variations, which play a
role in determining refresh time for eDRAM cells.
Refresh power potentially represents a large fraction
of overall system power, particularly during low-power
states when the CPU is idle. Future designs need to
reduce cache power without incurring the high cost of
flushing cache data when entering low-power
states.\par
In this paper, we show the significant impact of
variations on refresh time and cache power consumption
for large eDRAM caches. We propose Hi-ECC, a technique
that incorporates multi-bit error-correcting codes to
significantly reduce refresh rate. Multi-bit
error-correcting codes usually have a complex decoder
design and high storage cost. Hi-ECC avoids the decoder
complexity by using strong ECC codes to identify and
disable sections of the cache with multi-bit failures,
while providing efficient single-bit error correction
for the common case. Hi-ECC includes additional
optimizations that allow us to amortize the storage
cost of the code over large data words, providing the
benefit of multi-bit correction at same storage cost as
a single-bit error-correcting (SECDED) code (2\%
overhead). Our proposal achieves a 93\% reduction in
refresh power vs. a baseline eDRAM cache without error
correcting capability, and a 66\% reduction in refresh
power vs. a system using SECDED codes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "DRAM; ECC; eDRAM; idle power; idle states; multi-bit
ECC; refresh power; Vccmin",
}
@Article{Xue:2010:ICF,
author = "Jing Xue and Alok Garg and Berkehan Ciftcio{\u{g}}lu
and Jianyun Hu and Shang Wang and Ioannis Savidis and
Manish Jain and Rebecca Berman and Peng Liu and Michael
Huang and Hui Wu and Eby Friedman and Gary Wicks and
Duncan Moore",
title = "An intra-chip free-space optical interconnect",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "94--105",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815975",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Continued device scaling enables microprocessors and
other systems-on-chip (SoCs) to increase their
performance, functionality, and hence, complexity.
Simultaneously, relentless scaling, if uncompensated,
degrades the performance and signal integrity of
on-chip metal interconnects. These systems have
therefore become increasingly communications-limited.
The communications-centric nature of future high
performance computing devices demands a fundamental
change in intra- and inter-chip interconnect
technologies.\par
Optical interconnect is a promising long term solution.
However, while significant progress in optical {\em
signaling\/} has been made in recent years, {\em
networking\/} issues for on-chip optical interconnect
still require much investigation. Taking the underlying
optical signaling systems as a drop-in replacement for
conventional electrical signaling while maintaining
conventional packet-switching architectures is unlikely
to realize the full potential of optical interconnects.
In this paper, we propose and study the design of a
fully distributed interconnect architecture based on
free-space optics. The architecture leverages a suite
of newly-developed or emerging devices, circuits, and
optics technologies. The interconnect avoids packet
relay altogether, offers an ultra-low transmission
latency and scalable bandwidth, and provides fresh
opportunities for coherency substrate designs and
optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "3d; free-space optical interconnect; intra-chip",
}
@Article{Das:2010:AEP,
author = "Reetuparna Das and Onur Mutlu and Thomas Moscibroda
and Chita R. Das",
title = "{A{\'e}rgia}: exploiting packet latency slack in
on-chip networks",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "106--116",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815976",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Traditional Network-on-Chips (NoCs) employ simple
arbitration strategies, such as round-robin or
oldest-first, to decide which packets should be
prioritized in the network. This is counter-intuitive
since different packets can have very different effects
on system performance due to, e.g., different level of
memory-level parallelism (MLP) of applications. Certain
packets may be performance-critical because they cause
the processor to stall, whereas others may be delayed
for a number of cycles with no effect on
application-level performance as their latencies are
hidden by other outstanding packets' latencies. In this
paper, we define slack as a key measure that
characterizes the relative importance of a packet.
Specifically, the slack of a packet is the number of
cycles the packet can be delayed in the network with no
effect on execution time. This paper proposes new
router prioritization policies that exploit the
available slack of interfering packets in order to
accelerate performance-critical packets and thus
improve overall system performance. When two packets
interfere with each other in a router, the packet with
the lower slack value is prioritized. We describe
mechanisms to estimate slack, prevent starvation, and
combine slack-based prioritization with other recently
proposed application-aware prioritization
mechanisms.\par
We evaluate slack-based prioritization policies on a
64-core CMP with an 8x8 mesh NoC using a suite of 35
diverse applications. For a representative set of case
studies, our proposed policy increases average system
throughput by 21.0\% over the commonly-used round-robin
policy. Averaged over 56 randomly-generated
multiprogrammed workload mixes, the proposed policy
improves system throughput by 10.3\%, while also
reducing application-level unfairness by 30.8\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "arbitration; memory systems; multi-core; on-chip
networks; packet scheduling; prioritization",
}
@Article{Koka:2010:SPN,
author = "Pranay Koka and Michael O. McCracken and Herb
Schwetman and Xuezhe Zheng and Ron Ho and Ashok V.
Krishnamoorthy",
title = "Silicon-photonic network architectures for scalable,
power-efficient multi-chip systems",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "117--128",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815977",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Scaling trends of logic, memories, and interconnect
networks lead towards dense many-core chips.
Unfortunately, process yields and reticle sizes limit
the scalability of large single-chip systems.
Multi-chip systems break free of these areal limits,
but in turn require enormous chip-to-chip bandwidth.
The 'macrochip' concept presented here integrates
multiple many-core processor chips in a single package
with silicon-photonic interconnects. This design
enables a multi-chip system to approach the performance
of a single large die.\par
In this paper we propose three silicon-photonic network
designs that provide low-power, high-bandwidth
inter-die communication: a static wavelength-routed
point-to-point network, a 'two-phase' arbitrated
network, and a limited-connectivity point-to-point
network. We also adapt two existing intra-chip
silicon-photonic interconnects: a token-ring-based
crossbar and a circuit-switched torus.\par
We simulate a 64-die, 512-core cache-coherent macrochip
using all of the above networks with synthetic kernels,
and kernels from Splash-2 and PARSEC. We evaluate the
networks on performance, optical power and complexity.
Despite a narrow data-path width compared to the
token-ring or torus, the point-to-point performs 3.3x
and 3.9x better respectively. We show that the
point-to-point is over 10x more power-efficient than
the other networks. We also show that, contrary to
electronic network designs, a point-to-point network
has the lowest design complexity for an inter-chip
silicon-photonic network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "interconnection networks; nanophotonics",
}
@Article{Beamer:2010:RAD,
author = "Scott Beamer and Chen Sun and Yong-Jin Kwon and Ajay
Joshi and Christopher Batten and Vladimir
Stojanovi{\'c} and Krste Asanovi{\'c}",
title = "Re-architecting {DRAM} memory systems with
monolithically integrated silicon photonics",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "129--140",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815978",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The performance of future manycore processors will
only scale with the number of integrated cores if there
is a corresponding increase in memory bandwidth.
Projected scaling of electrical DRAM architectures
appears unlikely to suffice, being constrained by
processor and DRAM pin-bandwidth density and by total
DRAM chip power, including off-chip signaling,
cross-chip interconnect, and bank access energy. In
this work, we redesign the DRAM main memory system
using a proposed monolithically integrated silicon
photonics technology and show that our photonically
interconnected DRAM (PIDRAM) provides a promising
solution to all of these issues. Photonics can provide
high aggregate pin-bandwidth density through dense
wavelength-division multiplexing. Photonic signaling
provides energy-efficient communication, which we
exploit to not only reduce chip-to-chip interconnect
power but to also reduce cross-chip interconnect power
by extending the photonic links deep into the actual
PIDRAM chips. To complement these large improvements in
interconnect bandwidth and power, we decrease the
number of bits activated per bank to improve the energy
efficiency of the PIDRAM banks themselves. Our most
promising design point yields approximately a 10x power
reduction for a single-chip PIDRAM channel with similar
throughput and area as a projected future
electrical-only DRAM. Finally, we propose optical power
guiding as a new technique that allows a single PIDRAM
chip design to be used efficiently in several
multi-chip configurations that provide either increased
aggregate capacity or bandwidth.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dram architecture; energy-efficiency; silicon
photonics",
}
@Article{Schechter:2010:UEE,
author = "Stuart Schechter and Gabriel H. Loh and Karin Straus
and Doug Burger",
title = "Use {ECP}, not {ECC}, for hard failures in resistive
memories",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "141--152",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815980",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As leakage and other charge storage limitations begin
to impair the scalability of DRAM, non-volatile
resistive memories are being developed as a potential
replacement. Unfortunately, current error correction
techniques are poorly suited to this emerging class of
memory technologies. Unlike DRAM, PCM and other
resistive memories have wear lifetimes, measured in
writes, that are sufficiently short to make cell
failures common during a system's lifetime. However,
resistive memories are much less susceptible to
transient faults than DRAM. The Hamming-based ECC codes
used in DRAM are designed to handle transient faults
with no effective lifetime limits, but ECC codes
applied to resistive memories would wear out faster
than the cells they are designed to repair. This paper
evaluates {\em Error-Correcting Pointers\/} (ECP), a
new approach to error correction optimized for memories
in which errors are the result of permanent cell
failures that occur, and are immediately detectable, at
write time. ECP corrects errors by permanently encoding
the locations of failed cells into a table and
assigning cells to replace them. ECP provides longer
lifetimes than previously proposed solutions with
equivalent overhead. What's more, as the level of
variance in cell lifetimes increases -- a likely
consequence of further scaling -- ECP's margin of
improvement over existing schemes increases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "error correction; hard failures; memory; phase change
memory; resistive memories",
}
@Article{Qureshi:2010:MMS,
author = "Moinuddin K. Qureshi and Michele M. Franceschini and
Luis A. Lastras-Monta{\~n}o and John P. Karidis",
title = "Morphable memory system: a robust architecture for
exploiting multi-level phase change memories",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "153--162",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815981",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Phase Change Memory (PCM) is emerging as a scalable
and power efficient technology to architect future main
memory systems. The scalability of PCM is enhanced by
the property that PCM devices can store multiple bits
per cell. While such Multi-Level Cell (MLC) devices can
offer high density, this benefit comes at the expense
of increased read latency, which can cause significant
performance degradation. This paper proposes {\em
Morphable Memory System (MMS)}, a robust architecture
for efficiently incorporating MLC PCM devices in main
memory. MMS is based on observation that memory
requirement varies between workloads, and systems are
typically over-provisioned in terms of memory capacity.
So, during a phase of low memory usage, some of the MLC
devices can be operated at fewer bits per cell to
obtain lower latency. When the workload requires full
memory capacity, these devices can be restored to high
density MLC operation to have full main-memory
capacity. We provide the runtime monitors, the
hardware-OS interface, and the detailed mechanism for
implementing MMS. Our evaluations on an 8-core 8GB MLC
PCM-based system show that MMS provides, on average,
low latency access for 95\% of all memory requests,
thereby improving overall system performance by 40\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "morphable memory; multi-level cell; phase change
memory",
}
@Article{Pritchett:2010:SHS,
author = "Timothy Pritchett and Mithuna Thottethodi",
title = "{SieveStore}: a highly-selective, ensemble-level disk
cache for cost-performance",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "163--174",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815982",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging solid-state storage media can significantly
improve storage performance and energy. However, the
high cost-per-byte of solid-state media has hindered
wide-spread adoption in servers. This paper proposes a
new, cost-effective architecture - SieveStore - which
enables the use of solid-state media to significantly
filter access to storage ensembles. Our paper makes
three key contributions. First, we make a case for
highly-selective, storage-ensemble-level disk-block
caching based on the highly-skewed block popularity
distribution and based on the dynamic nature of the
popular block set. Second, we identify the problem of
{\em allocation-writes\/} and show that selective cache
allocation to reduce allocation-writes - {\em
sieving\/} - is fundamental to enable efficient
ensemble-level disk-caching. Third, we propose two
practical variants of SieveStore. Based on week-long
block access traces from a storage ensemble of 13
servers, we find that the two components (sieving and
ensemble-level caching) each contribute to SieveStore's
cost-effectiveness. Compared to unsieved,
ensemble-level disk-caches, SieveStore achieves
significantly higher hit ratios (35\%-50\% more, on
average) while using only 1/7$^{th}$ the number of SSD
drives. Further, ensemble-level caching is strictly
better in cost-performance compared to per-server
caching.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "disk cache; flash memory; selective allocation; solid
state disks; storage; storage ensembles",
}
@Article{Udipi:2010:RDD,
author = "Aniruddha N. Udipi and Naveen Muralimanohar and
Niladrish Chatterjee and Rajeev Balasubramonian and Al
Davis and Norman P. Jouppi",
title = "Rethinking {DRAM} design and organization for
energy-constrained multi-cores",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "175--186",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815983",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "DRAM vendors have traditionally optimized the
cost-per-bit metric, often making design decisions that
incur energy penalties. A prime example is the
overfetch feature in DRAM, where a single request
activates thousands of bit-lines in many DRAM chips,
only to return a single cache line to the CPU. The
focus on cost-per-bit is questionable in modern-day
servers where operating costs can easily exceed the
purchase cost. Modern technology trends are also
placing very different demands on the memory system:
(i)queuing delays are a significant component of memory
access time, (ii) there is a high energy premium for
the level of reliability expected for business-critical
computing, and (iii) the memory access stream emerging
from multi-core systems exhibits limited locality. All
of these trends necessitate an overhaul of DRAM
architecture, even if it means a slight compromise in
the cost-per-bit metric.\par
This paper examines three primary innovations. The
first is a modification to DRAM chip microarchitecture
that retains the traditional DDRx SDRAM interface.
Selective Bit-line Activation (SBA) waits for both RAS
(row address) and CAS (column address) signals to
arrive before activating exactly those bitlines that
provide the requested cache line. SBA reduces energy
consumption while incurring slight area and performance
penalties. The second innovation, Single Subarray
Access (SSA), fundamentally re-organizes the layout of
DRAM arrays and the mapping of data to these arrays so
that an entire cache line is fetched from a single
subarray. It requires a different interface to the
memory controller, reduces dynamic and background
energy (by about 6X), incurs a slight area penalty
(4\%), and can even lead to performance improvements
(54\% on average) by reducing queuing delays. The third
innovation further penalizes the cost-per-bit metric by
adding a checksum feature to each cache line. This
checksum error-detection feature can then be used to
build stronger RAID-like fault tolerance, including
chipkill-level reliability. Such a technique is
especially crucial for the SSA architecture where the
entire cache line is localized to a single chip. This
DRAM chip microarchitectural change leads to a dramatic
reduction in the energy and storage overheads for
reliability. The proposed architectures will also apply
to other emerging memory technologies (such as
resistive memories) and will be less disruptive to
standards, interfaces, and the design flow if they can
be incorporated into first-generation designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chipkill; dram architecture; energy-efficiency;
locality; subarrays",
}
@Article{Chen:2010:LPP,
author = "Yunji Chen and Weiwu Hu and Tianshi Chen and Ruiyang
Wu",
title = "{LReplay}: a pending period based deterministic replay
scheme",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "187--197",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815985",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Debugging parallel program is a well-known difficult
problem. A promising method to facilitate debugging
parallel program is using hardware support to achieve
deterministic replay. A hardware-assisted deterministic
replay scheme should have a small log size, as well as
low design cost, to be feasible for adopting by
industrial processors. To achieve the goals, we propose
a novel and succinct hardware-assisted deterministic
replay scheme named LReplay. The key innovation of
LReplay is that instead of recording the logical time
orders between instructions or instruction blocks as
previous investigations, LReplay is built upon
recording the pending period information [6]. According
to the experimental results on Godson-3, the overall
log size of LReplay is about 0.55B/K-Inst (byte per
k-instruction) for sequential consistency, and
0.85B/K-Inst for Godson-3 consistency. The log size is
smaller in an order of magnitude than state-of-art
deterministic replay schemes incurring no performance
loss. Furthermore, LReplay only consumes about $ 1.3 \%
$ area of Godson-3, since it requires only trivial
modifications to the existing components of Godson-3.
The above features of LReplay demonstrate the potential
of integrating hardware-assisted deterministic replay
into future industrial processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "deterministic replay; DFD; global clock; multi-core
processor; pending period; physical time order",
}
@Article{Voskuilen:2010:TEA,
author = "Gwendolyn Voskuilen and Faraz Ahmad and T. N.
Vijaykumar",
title = "{Timetraveler}: exploiting acyclic races for
optimizing memory race recording",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "198--209",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815986",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As chip multiprocessors emerge as the prevalent
microprocessor architecture, support for debugging
shared-memory parallel programs becomes important. A
key difficulty is the programs' nondeterministic
semantics due to which replay runs of a buggy program
may not reproduce the bug. The non-determinism stems
from memory races where accesses from two threads, at
least one of which is a write, go to the same memory
location. Previous hardware schemes for memory race
recording log the predecessor-successor thread ordering
at memory races and enforce the same orderings in the
replay run to achieve deterministic replay. To reduce
the log size, the schemes exploit transitivity in the
orderings to avoid recording redundant orderings. To
reduce the log size further while requiring minimal
hardware, we propose {\em Timetraveler\/} which for the
first time exploits acyclicity of races based on the
key observation that an acyclic race need not be
recorded even if the race is not covered already by
transitivity. Timetraveler employs a novel and elegant
mechanism called {\em post-dating\/} which both ensures
that acyclic races, including those through the L2, are
eventually ordered correctly, and identifies cyclic
races. To address false cycles through the L2,
Timetraveler employs another novel mechanism called
{\em time-delay buffer\/} which delays the advancement
of the L2 banks' timestamps and thereby reduces the
false cycles. Using simulations, we show that
Timetraveler reduces the log size for commercial
workloads by 88\% over the best previous approach while
using only a 696-byte time-delay buffer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "debugging; determinism; race recording; replay",
}
@Article{Lucia:2010:CES,
author = "Brandon Lucia and Luis Ceze and Karin Strauss and Shaz
Qadeer and Hans-J. Boehm",
title = "Conflict exceptions: simplifying concurrent language
semantics with precise hardware exceptions for
data-races",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "210--221",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815987",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We argue in this paper that concurrency errors should
be treated as exceptions, {\em i.e.}, have fail-stop
behavior and precise semantics. We propose an exception
model based on conflict of synchronization free
regions, which precisely detects a broad class of
data-races. We show that our exceptions provide enough
guarantees to simplify high-level programming language
semantics and debugging, but are significantly cheaper
to enforce than traditional data-race detection. To
make the performance cost of enforcement negligible, we
propose architecture support for accurately detecting
and precisely delivering these exceptions. We evaluate
the suitability of our model as well as the behavior of
our architectural mechanisms using the PARSEC benchmark
suite and commercial applications. Our results show
that the exception model largely reflects how
programmers are already writing code and that the main
memory, traffic and performance overheads of the
enforcement mechanisms we propose are very low.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bug detection; data-races; memory consistency models;
multicores; threads",
}
@Article{Lucia:2010:CAS,
author = "Brandon Lucia and Luis Ceze and Karin Strauss",
title = "{ColorSafe}: architectural support for debugging and
dynamically avoiding multi-variable atomicity
violations",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "222--233",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815988",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we propose ColorSafe, an architecture
that detects and dynamically avoids single- and
multi-variable atomicity violation bugs. The key idea
is to group related data into colors and then monitor
access interleavings in the 'color space'. This enables
detection of atomicity violations involving any data of
the same color. We leverage support for meta-data to
maintain color information, and signatures to
efficiently keep recent color access histories.
ColorSafe dynamically avoids atomicity violations by
inserting ephemeral transactions that prevent erroneous
interleavings. ColorSafe has two modes of operation:
(1) {\em debugging mode\/} makes detection more
precise, producing fewer false positives and collecting
more information; and, (2) {\em deployment mode\/}
provides robust, efficient dynamic bug avoidance with
less precise detection. This makes ColorSafe useful
throughout the lifetime of programs, not just during
development. Our results show that, in deployment mode,
ColorSafe is able to successfully avoid the majority of
multi-variable atomicity violations in bug kernels, as
well as in large applications (Apache and MySQL). In
debugging mode, ColorSafe detects bugs with few false
positives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "atomicity violations; bug avoidance; concurrency
errors; data coloring; debugging; multi-variable",
}
@Article{Irwin:2010:SCM,
author = "Mary Jane Irwin",
title = "Shared caches in multicores: the good, the bad, and
the ugly",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "234--234",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815990",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As we transition from clock-frequency performance
scaling to performance scaling with multicores, the
pressure on the memory hierarchy is increasing
dramatically. Many different on-chip cache topologies
have been proposed/implemented; effective management of
these shared caches is crucial to multicore
performance.\par
This talk will begin with a description of a cache miss
classification scheme for multicores (compulsory,
inter-core misses, intra-core misses) that gives
insight into the interactions between memory
transactions of the different cores on a chip sharing a
cache. Ways to improve the on-chip cache performance
with architectural enhancements, compiler enhancements,
and runtime system enhancements will then be discussed.
If the application thread mapping and the on-chip
topology is static (i.e., does not change during
runtime), then compiler enhancements that support cache
topology aware code optimization can be used to
significantly improve an application's performance.
Results from such an augmented compiler, where the
topology is exposed to the compiler and where the
compiler also does thread-to-core mapping assignments,
will be presented. If the application thread mapping or
the on-chip topology is dynamic, then other
alternatives exist. For example, a thread scheduler, or
allocator, can make decisions about moving threads to
different cores during runtime in the hopes of
improving overall cache performance. Initial
experiments with the REEact system being developed by
researchers at Penn State--UPittsburgh--UVirginia that
'reacts' to hardware conditions (such as cache miss
rates, hot-spots, etc.) by reallocating threads at
runtime will be outlined. Finally, if the on-chip cache
topology itself is dynamic (i.e., is designed to be
reconfigurable at runtime), large performance benefits
might be obtained. However, both hardware and software
design challenges to realizing such a dynamic system
abound. Some of these challenges will be briefly
discussed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "caches; multicore",
}
@Article{Meng:2010:DWS,
author = "Jiayuan Meng and David Tarjan and Kevin Skadron",
title = "Dynamic warp subdivision for integrated branch and
memory divergence tolerance",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "235--246",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815992",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "SIMD organizations amortize the area and power of
fetch, decode, and issue logic across multiple
processing units in order to maximize throughput for a
given area and power budget. However, throughput is
reduced when a set of threads operating in lockstep (a
warp) are stalled due to long latency memory accesses.
The resulting idle cycles are extremely costly.
Multi-threading can hide latencies by interleaving the
execution of multiple warps, but deep multi-threading
using many warps dramatically increases the cost of the
register files (multi-threading depth $ \times $ SIMD
width), and cache contention can make performance
worse. Instead, intra-warp latency hiding should first
be exploited. This allows threads that are ready but
stalled by SIMD restrictions to use these idle cycles
and reduces the need for multi-threading among warps.
This paper introduces {\em dynamic warp subdivision\/}
(DWS), which allows a single warp to occupy more than
one slot in the scheduler without requiring extra
register file space. Independent scheduling entities
allow divergent branch paths to interleave their
execution, and allow threads that hit to run ahead. The
result is improved latency hiding and memory level
parallelism (MLP). We evaluate the technique on a
coherent cache hierarchy with private L1 caches and a
shared L2 cache. With an area overhead of less than
1\%, experiments with eight data-parallel benchmarks
show our technique improves performance on average by
1.7$ \times $.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "branch divergence; cache; latency hiding; memory
divergence; SIMD; warp",
}
@Article{Chakradhar:2010:DCC,
author = "Srimat Chakradhar and Murugan Sankaradas and Venkata
Jakkula and Srihari Cadambi",
title = "A dynamically configurable coprocessor for
convolutional neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "247--257",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815993",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Convolutional neural networks (CNN) applications range
from recognition and reasoning (such as handwriting
recognition, facial expression recognition and video
surveillance) to intelligent text applications such as
semantic text analysis and natural language processing
applications. Two key observations drive the design of
a new architecture for CNN. First, CNN workloads
exhibit a {\em widely varying mix of three types of
parallelism\/}: parallelism within a convolution
operation, intra-output parallelism where multiple
input sources (features) are combined to create a
single output, and inter-output parallelism where
multiple, independent outputs (features) are computed
simultaneously. Workloads differ significantly across
different CNN applications, and across different layers
of a CNN. Second, the number of processing elements in
an architecture continues to scale (as per Moore's law)
much faster than off-chip memory bandwidth (or
pin-count) of chips. Based on these two observations,
we show that for a given number of processing elements
and off-chip memory bandwidth, a new CNN hardware
architecture that dynamically configures the hardware
on-the-fly to match the specific mix of parallelism in
a given workload gives the best throughput performance.
Our CNN compiler automatically translates high
abstraction network specification into a parallel
microprogram (a sequence of low-level VLIW
instructions) that is mapped, scheduled and executed by
the coprocessor. Compared to a 2.3 GHz quad-core, dual
socket Intel Xeon, 1.35 GHz C870 GPU, and a 200 MHz
FPGA implementation, our 120 MHz dynamically
configurable architecture is 4x to 8x faster. This is
the {\em first CNN architecture to achieve real-time
video stream processing\/} (25 to 30 frames per second)
on a wide range of object detection and recognition
tasks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "convolutional neural networks; dynamic
reconfiguration; parallel computer architecture",
}
@Article{Blundell:2010:RTR,
author = "Colin Blundell and Arun Raghavan and Milo M. K.
Martin",
title = "{RETCON}: transactional repair without replay",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "258--269",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815995",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Over the past decade there has been a surge of
academic and industrial interest in optimistic
concurrency, {\em i.e.\/} the speculative parallel
execution of code regions that have the semantics of
isolation. This work analyzes scalability bottlenecks
of workloads that use optimistic concurrency. We find
that one common bottleneck is updates to auxiliary
program data in otherwise non-conflicting operations,
{\em e.g.\/} reference count updates and hashtable
occupancy field increments.\par
To eliminate the performance impact of conflicts on
such auxiliary data, this work proposes RETCON, a
hardware mechanism that tracks the relationship between
input and output values symbolically and uses this
symbolic information to transparently repair the output
state of a transaction at commit. RETCON is inspired by
instruction replay-based mechanisms but exploits
simplifying properties of the nature of computations on
auxiliary data to perform repair {\em without\/}
replay. Our experiments show that RETCON provides
significant speedups for workloads that exhibit
conflicts on auxiliary data, including transforming a
transactionalized version of the Python interpreter
from a workload that exhibits no scaling to one that
exhibits near-linear scaling on 32 cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "parallel programming; transactional memory",
}
@Article{Lee:2010:TTD,
author = "Janghaeng Lee and Haicheng Wu and Madhumitha
Ravichandran and Nathan Clark",
title = "{Thread Tailor}: dynamically weaving threads together
for efficient, adaptive parallel applications",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "270--279",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815996",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Extracting performance from modern parallel
architectures requires that applications be divided
into many different threads of execution. Unfortunately
selecting the appropriate number of threads for an
application is a daunting task. Having too many threads
can quickly saturate shared resources, such as cache
capacity or memory bandwidth, thus degrading
performance. On the other hand, having too few threads
makes inefficient use of the resources available.
Beyond static resource assignment, the program inputs
and dynamic system state (e.g., what other applications
are executing in the system) can have a significant
impact on the right number of threads to use for a
particular application.\par
To address this problem we present the Thread Tailor, a
dynamic system that automatically adjusts the number of
threads in an application to optimize system
efficiency. The Thread Tailor leverages offline
analysis to estimate what type of threads will exist at
runtime and the communication patterns between them.
Using this information Thread Tailor dynamically
combines threads to better suit the needs of the target
system. Thread Tailor adjusts not only to the
architecture, but also other applications in the
system, and this paper demonstrates that this type of
adjustment can lead to significantly better use of
thread-level parallelism in real-world architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dynamic compilation; managed parallelism; threading",
}
@Article{Hong:2010:IGP,
author = "Sunpyo Hong and Hyesoon Kim",
title = "An integrated {GPU} power and performance model",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "280--289",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815998",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPU architectures are increasingly important in the
multi-core era due to their high number of parallel
processors. Performance optimization for multi-core
processors has been a challenge for programmers.
Furthermore, optimizing for power consumption is even
more difficult. Unfortunately, as a result of the high
number of processors, the power consumption of
many-core processors such as GPUs has increased
significantly.\par
Hence, in this paper, we propose an integrated power
and performance (IPP) prediction model for a GPU
architecture to predict the optimal number of active
processors for a given application. The basic intuition
is that when an application reaches the peak memory
bandwidth, using more cores does not result in
performance improvement.\par
We develop an empirical power model for the GPU. Unlike
most previous models, which require measured execution
times, hardware performance counters, or architectural
simulations, IPP predicts execution times to calculate
dynamic power events. We then use the outcome of IPP to
control the number of running cores. We also model the
increases in power consumption that resulted from the
increases in temperature.\par
With the predicted optimal number of active cores, we
show that we can save up to 22.09\%of runtime GPU
energy consumption and on average 10.99\% of that for
the five memory bandwidth-limited benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "analytical model; CUDA; energy; GPU architecture;
performance; power estimation",
}
@Article{Tan:2010:CFF,
author = "Zhangxi Tan and Andrew Waterman and Henry Cook and
Sarah Bird and Krste Asanovi{\'c} and David Patterson",
title = "A case for {FAME}: {FPGA} architecture model
execution",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "290--301",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1815999",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Given the multicore microprocessor revolution, we
argue that the architecture research community needs a
dramatic increase in simulation capacity. We believe
FPGA Architecture Model Execution (FAME) simulators can
increase the number of useful architecture research
experiments per day by two orders of magnitude over
Software Architecture Model Execution (SAME)
simulators. To clear up misconceptions about FPGA-based
simulation methodologies, we propose a FAME taxonomy to
distinguish the cost-performance of variations on these
ideas. We demonstrate our simulation speedup claim with
a case study wherein we employ a prototype FAME
simulator, RAMP Gold, to research the interaction
between hardware partitioning mechanisms and operating
system scheduling policy. The study demonstrates FAME's
capabilities: we run a modern parallel benchmark suite
on a research operating system, simulate 64-core target
architectures with multi-level memory hierarchy timing
models, and add experimental hardware mechanisms to the
target machine. The simulation speedup achieved by our
adoption of FAME-250\times -enables experiments with
more realistic time scales and data set sizes than are
possible with SAME.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "FPGA; microprocessors; simulation",
}
@Article{Blake:2010:ETL,
author = "Geoffrey Blake and Ronald G. Dreslinski and Trevor
Mudge and Kriszti{\'a}n Flautner",
title = "Evolution of thread-level parallelism in desktop
applications",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "302--313",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816000",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As the effective limits of frequency and instruction
level parallelism have been reached, the strategy of
microprocessor vendors has changed to increase the
number of processing cores on a single chip each
generation. The implicit expectation is that software
developers will write their applications with
concurrency in mind to take advantage of this sudden
change in direction. In this study we analyze whether
software developers for laptop/desktop machines have
followed the recent hardware trends by creating
software for chip multi-processing. We conduct a study
of a wide range of applications on Microsoft Windows 7
and Apple's OS X Snow Leopard, measuring {\em Thread
Level Parallelism\/} on a high performance workstation
and a low power desktop. In addition, we explore
graphics processing units (GPUs) and their impact on
chip multi-processing. We compare our findings to a
study done 10 years ago which concluded that a second
core was sufficient to improve system responsiveness.
Our results on today's machines show that, 10 years
later, surprisingly 2-3 cores are more than adequate
for most applications and that the GPU often remains
under-utilized. However, in some application specific
domains an 8 core SMT system with a 240 core GPU can be
effectively utilized. Overall these studies suggest
that many-core architectures are not a natural fit for
current desktop/laptop applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "benchmarking; desktop applications; multi-core; thread
level parallelism",
}
@Article{Reddi:2010:WSU,
author = "Vijay Janapa Reddi and Benjamin C. Lee and Trishul
Chilimbi and Kushagra Vaid",
title = "{Web} search using mobile cores: quantifying and
mitigating the price of efficiency",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "314--325",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816002",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The commoditization of hardware, data center economies
of scale, and Internet-scale workload growth all demand
greater power efficiency to sustain scalability.
Traditional enterprise workloads, which are typically
memory and I/O bound, have been well served by chip
multiprocessors comprised of small, power-efficient
cores. Recent advances in mobile computing have led to
modern small cores capable of delivering even better
power efficiency. While these cores can deliver
performance-per-Watt efficiency for data center
workloads, small cores impact application
quality-of-service robustness, and flexibility, as
these workloads increasingly invoke computationally
intensive kernels. These challenges constitute the
price of efficiency. We quantify efficiency for an
industry-strength online web search engine in
production at both the microarchitecture- and
system-level, evaluating search on server and
mobile-class architectures using Xeon and Atom
processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "bing; energy efficiency; mobile cores; web search",
}
@Article{Soundararajan:2010:IMO,
author = "Vijayaraghavan Soundararajan and Jennifer M.
Anderson",
title = "The impact of management operations on the virtualized
datacenter",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "326--337",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816003",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Virtualization has the potential to dramatically
reduce the total cost of ownership of datacenters and
increase the flexibility of deployments for
general-purpose workloads. If present trends continue,
the datacenter of the future will be largely
virtualized. The base platform in such a datacenter
will consist of physical hosts that run hypervisors,
and workloads will run within virtual machines on these
platforms. From a system management perspective, the
virtualized environment enables a number of new
workflows in the datacenter. These workflows involve
operations on the physical hosts themselves, such as
upgrading the hypervisor, as well as operations on the
virtual machines, such as reconfiguration or reverting
from snapshots. While traditional datacenter design has
focused on the cost vs. capability tradeoffs for the
end-user applications running in the datacenter, we
argue that the management workload from these workflows
must be factored into the design of the virtualized
datacenter.\par
In this paper, we examine data from real-world
virtualized deployments to characterize common
management workflows and assess their impact on
resource usage in the datacenter. We show that while
many end-user applications are fairly light on I/O
requirements, the management workload has considerable
network and disk I/O requirements. We show that the
management workload scales with the increasing compute
power in the datacenter. Finally, we discuss the
implications of this management workload for the
datacenter.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cloud computing; datacenter management; management
workload; virtual machine management",
}
@Article{Abts:2010:EPD,
author = "Dennis Abts and Michael R. Marty and Philip M. Wells
and Peter Klausler and Hong Liu",
title = "Energy proportional datacenter networks",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "338--347",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816004",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Numerous studies have shown that datacenter computers
rarely operate at full utilization, leading to a number
of proposals for creating servers that are {\em energy
proportional\/} with respect to the computation that
they are performing.\par
In this paper, we show that as servers themselves
become more energy proportional, the datacenter network
can become a significant fraction (up to 50\%) of
cluster power. In this paper we propose several ways to
design a high-performance datacenter network whose
power consumption is more proportional to the amount of
traffic it is moving -- that is, we propose {\em energy
proportional datacenter networks}.\par
We first show that a flattened butterfly topology
itself is inherently more power efficient than the
other commonly proposed topology for high-performance
datacenter networks. We then exploit the
characteristics of modern plesiochronous links to
adjust their power and performance envelopes
dynamically. Using a network simulator, driven by both
synthetic workloads and production datacenter traces,
we characterize and understand design tradeoffs, and
demonstrate an 85\% reduction in power --- which
approaches the ideal energy-proportionality of the
network.\par
Our results also demonstrate two challenges for the
designers of future network switches: (1) We show that
there is a significant power advantage to having
independent control of each unidirectional channel
comprising a network link, since many traffic patterns
show very asymmetric use, and (2) system designers
should work to optimize the high-speed channel designs
to be more energy efficient by choosing optimal data
rate and equalization technology. Given these
assumptions, we demonstrate that energy proportional
datacenter communication is indeed possible.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "datacenter networks; interconnection networks;
low-power networking",
}
@Article{Thacker:2010:IFE,
author = "Charles P. Thacker",
title = "Improving the future by examining the past",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "348--348",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816006",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "During the last fifty years, the technology underlying
computer systems has improved dramatically. As
technology has evolved, designers have made a series of
choices in the way it was applied in computers. In some
cases, decisions that were made in the twentieth
century make less sense in the twenty-first.
Conversely, paths not taken might now be more
attractive given the state of technology today,
particularly in light of the limits the field is
facing, such as the increasing gap between processor
speed and storage access times and the difficulty of
cooling today's computers.\par
In this talk, I'll discuss some of these choices and
suggest some possible changes that might make computing
better in the twenty-first century.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "Turing Award",
}
@Article{Temam:2010:RNN,
author = "Olivier Temam",
title = "The rebirth of neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "349--349",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816008",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "After the hype of the 1990s, where companies like
Intel or Philips built commercial hardware systems
based on neural networks, the approach quickly lost
ground for multiple reasons: hardware neural networks
were no match for software neural networks run on
rapidly progressing general-purpose processors, their
application scope was considered too limited, and even
progress in machine-learning theory overshadowed neural
networks.\par
However, in the past few years, a remarkable
convergence of trends and innovations is casting a new
light on neural networks and could make them valuable
components of future computing systems. Trends in
technology call for architectures which can sustain a
large number of defects, something neural networks are
intrinsically capable of. Tends in applications,
summarized in the recent RMS categorization, highlight
a number of key algorithms which are eligible to neural
networks implementations. At the same time, innovations
in technology, such as the recent realization of a
memristor, are creating the conditions for the
efficient hardware implementation of neural networks.
Innovations in machine learning, with the recent advent
of Deep Networks, have revived interest in neural
networks. Finally, recent findings in neurobiology
carry even greater prospects, where detailed
explanations of how complex functions, such as vision,
can be implemented further open up the defect-tolerance
and application potential of neural network
architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "neural networks",
}
@Article{Keller:2010:NVC,
author = "Eric Keller and Jakub Szefer and Jennifer Rexford and
Ruby B. Lee",
title = "{NoHype}: virtualized cloud infrastructure without the
virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "350--361",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816010",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cloud computing is a disruptive trend that is changing
the way we use computers. The key underlying technology
in cloud infrastructures is virtualization -- so much
so that many consider virtualization to be one of the
key features rather than simply an implementation
detail. Unfortunately, the use of virtualization is the
source of a significant security concern. Because
multiple virtual machines run on the same server and
since the virtualization layer plays a considerable
role in the operation of a virtual machine, a malicious
party has the opportunity to attack the virtualization
layer. A successful attack would give the malicious
party control over the all-powerful virtualization
layer, potentially compromising the confidentiality and
integrity of the software and data of any virtual
machine. In this paper we propose removing the
virtualization layer, while retaining the key features
enabled by virtualization. Our NoHype architecture,
named to indicate the removal of the hypervisor,
addresses each of the key roles of the virtualization
layer: arbitrating access to CPU, memory, and I/O
devices, acting as a network device (e.g., Ethernet
switch), and managing the starting and stopping of
guest virtual machines. Additionally, we show that our
NoHype architecture may indeed be 'no hype' since
nearly all of the needed features to realize the NoHype
architecture are currently available as hardware
extensions to processors and I/O devices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cloud computing; hypervisor; many-core; multi-core;
security; system architecture; virtualization",
}
@Article{Eyerman:2010:MCS,
author = "Stijn Eyerman and Lieven Eeckhout",
title = "Modeling critical sections in {Amdahl's Law} and its
implications for multicore design",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "362--370",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816011",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a fundamental law for parallel
performance: it shows that parallel performance is not
only limited by sequential code (as suggested by
Amdahl's law) but is also fundamentally limited by
synchronization through critical sections. Extending
Amdahl's software model to include critical sections,
we derive the surprising result that the impact of
critical sections on parallel performance can be
modeled as a completely sequential part and a
completely parallel part. The sequential part is
determined by the probability for entering a critical
section and the contention probability (i.e., multiple
threads wanting to enter the same critical section).
This fundamental result reveals at least three
important insights for multicore design. (i) Asymmetric
multicore processors deliver less performance benefits
relative to symmetric processors than suggested by
Amdahl's law, and in some cases even worse performance.
(ii) Amdahl's law suggests many tiny cores for optimum
performance in asymmetric processors, however, we find
that fewer but larger small cores can yield
substantially better performance. (iii) Executing
critical sections on the big core can yield substantial
speedups, however, performance is sensitive to the
accuracy of the critical section contention
predictor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "Amdahl's law; analytical performance modeling;
critical sections; synchronization",
}
@Article{Guo:2010:RCA,
author = "Xiaochen Guo and Engin Ipek and Tolga Soyata",
title = "Resistive computation: avoiding the power wall with
low-leakage, {STT-MRAM} based computing",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "371--382",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816012",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As CMOS scales beyond the 45nm technology node,
leakage concerns are starting to limit microprocessor
performance growth. To keep dynamic power constant
across process generations, traditional MOSFET scaling
theory prescribes reducing supply and threshold
voltages in proportion to device dimensions, a practice
that induces an exponential increase in subthreshold
leakage. As a result, leakage power has become
comparable to dynamic power in current-generation
processes, and will soon exceed it in magnitude if
voltages are scaled down any further. Beyond this
inflection point, multicore processors will not be able
to afford keeping more than a small fraction of all
cores active at any given moment. Multicore scaling
will soon hit a power wall.\par
This paper presents resistive computation, a new
technique that aims at avoiding the power wall by
migrating most of the functionality of a modern
microprocessor from CMOS to spin-torque transfer
magnetoresistive RAM (STT-MRAM)---a CMOS-compatible,
leakage-resistant, non-volatile resistive memory
technology. By implementing much of the on-chip storage
and combinational logic using leakage-resistant,
scalable RAM blocks and lookup tables, and by carefully
re-architecting the pipeline, an STT-MRAM based
implementation of an eight-core Sun Niagara-like CMT
processor reduces chip-wide power dissipation by
1.7\times and leakage power by 2.1\times at the 32nm
technology node, while maintaining 93\% of the system
throughput of a CMOS-based design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "power-efficiency; STT-MRAM",
}
@Article{Seong:2010:SRP,
author = "Nak Hee Seong and Dong Hyuk Woo and Hsien-Hsin S.
Lee",
title = "Security refresh: prevent malicious wear-out and
increase durability for phase-change memory with
dynamically randomized address mapping",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "383--394",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816014",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Phase change memory (PCM) is an emerging memory
technology for future computing systems. Compared to
other non-volatile memory alternatives, PCM is more
matured to production, and has a faster read latency
and potentially higher storage density. The main
roadblock precluding PCM from being used, in
particular, in the main memory hierarchy, is its
limited write endurance. To address this issue, recent
studies proposed to either reduce PCM's write frequency
or use wear-leveling to evenly distribute writes.
Although these techniques can extend the lifetime of
PCM, most of them will not prevent deliberately
designed malicious codes from wearing it out quickly.
Furthermore, all the prior techniques did not consider
the circumstances of a compromised OS and its security
implication to the overall PCM design. A compromised OS
will allow adversaries to manipulate processes and
exploit side channels to accelerate wear-out.\par
In this paper, we argue that a PCM design not only has
to consider normal wear-out under normal application
behavior, most importantly, it must take the worst-case
scenario into account with the presence of malicious
exploits and a compromised OS to address the durability
and security issues simultaneously. In this paper, we
propose a novel, low-cost hardware mechanism called
Security Refresh to avoid information leak by
constantly migrating their physical locations inside
the PCM, obfuscating the actual data placement from
users and system software. It uses a dynamic randomized
address mapping scheme that swaps data using random
keys upon each refresh due. The hardware overhead is
tiny without using any table. The best lifetime we can
achieve under the worst-case malicious attack is more
than six years. Also, our scheme incurs around 1\%
performance degradation for normal program
operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "dynamic address remapping; phase change memory;
security; wear leveling",
}
@Article{Huang:2010:ICM,
author = "Ruirui Huang and G. Edward Suh",
title = "{IVEC}: off-chip memory integrity protection for both
security and reliability",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "395--406",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816015",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes a unified off-chip memory
integrity protection scheme, named IVEC. Today, a
system needs two independent mechanisms in order to
protect the memory integrity from both physical attacks
and random errors. Integrity verification schemes
detect malicious tampering of memory while error
correcting codes (ECC) detect and correct random
errors. IVEC enables both detection of malicious
attacks for security and correction of random errors
for reliability at the same time by extending the
integrity verification techniques. Analytical and
experimental studies show that IVEC can correct
single-bit errors and even multi-bit errors from one
DRAM chip within a cache block read without any
additional ECC bits, when the integrity verification is
also required for security, effectively removing the
memory and bandwidth overheads (12.5\%) of typical ECC
schemes. Alternatively, with parity bits, IVEC can
provide even stronger error correction capabilities
comparable to the traditional chip-kill correct, still
with less overheads. For both cases, IVEC can use
standard non-ECC DIMMs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "error correction; error detection; fault tolerance;
memory systems; reliability; security",
}
@Article{Shriraman:2010:SLW,
author = "Arrvindh Shriraman and Sandhya Dwarkadas",
title = "{Sentry}: light-weight auxiliary memory access
control",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "407--418",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816016",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Light-weight, flexible access control, which allows
software to regulate reads and writes to any
granularity of memory region, can help improve the
reliability of today's multi-module multi-programmer
applications, as well as the efficiency of software
debugging tools. Unfortunately, access control in
today's processors is tied to support for virtual
memory, making its use both heavy weight and coarse
grain. In this paper, we propose Sentry, an auxiliary
level of virtual memory tagging that is entirely
subordinate to existing virtual memory-based protection
mechanisms and can be manipulated at the user level. We
implement these tags in a complexity-effective manner
using an M-cache (metadata cache) structure that only
intervenes on L1 misses, thereby minimizing changes to
the processor core. Existing cache coherence states are
repurposed to implicitly validate permissions for L1
hits. Sentry achieves its goal of flexible and
light-weight access control without disrupting existing
inter-application protection, sidestepping the
challenges associated with adding a new protection
framework to an existing operating system.\par
We illustrate the benefits of our design point using
(1) an Apache-based web server that uses the M-cache to
enforce protection boundaries among its modules and (2)
a watchpoint-based tool to demonstrate low-overhead
debugging. Protection is achieved with very few changes
to the source code, no changes to the programming
model, minimal modifications to the operating system,
and with low overhead incurred only when accessing
memory regions for which the additional level of access
control is enabled.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "access control; cache coherence; memory protection;
multiprocessors; protection domains; safety; sentry",
}
@Article{Herrero:2010:ECC,
author = "Enric Herrero and Jos{\'e} Gonz{\'a}lez and Ramon
Canal",
title = "Elastic cooperative caching: an autonomous dynamically
adaptive memory hierarchy for chip multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "419--428",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816018",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Next generation tiled microarchitectures are going to
be limited by off-chip misses and by on-chip network
usage. Furthermore, these platforms will run an
heterogeneous mix of applications with very different
memory needs, leading to significant optimization
opportunities. Existing adaptive memory hierarchies use
either centralized structures that limit the
scalability or software based resource allocation that
increases programming complexity.\par
We propose Elastic Cooperative Caching, a dynamic and
scalable memory hierarchy that adapts automatically and
autonomously to application behavior for each node. Our
configuration uses elastic shared/private caches with
fully autonomous and distributed repartitioning units
for better scalability. Furthermore, we have extended
our elastic configuration with an Adaptive Spilling
mechanism to use the shared cache space only when it
can produce a performance improvement. Elastic caches
allow both the creation of big local private caches for
threads with high reuse of private data and the
creation of big shared spaces from unused caches. Local
data allocation in private regions allows to reduce
network usage and efficient cache partitioning allows
to reduce off-chip misses.\par
The proposed scheme outperforms previous proposals by a
minimum of 12\% (on average across the benchmarks) and
reduces the number of offchip misses by 16\%. Plus, the
dynamic and autonomous management of cache resources
avoids the reallocation of cache blocks without reuse
which results in an increase in energy efficiency of
24\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "chip multiprocessors; elastic cooperative caching;
memory hierarchy; tiled microarchitectures",
}
@Article{Kelm:2010:CHM,
author = "John H. Kelm and Daniel R. Johnson and William Tuohy
and Steven S. Lumetta and Sanjay J. Patel",
title = "{Cohesion}: a hybrid memory model for accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "429--440",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816019",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Two broad classes of memory models are available
today: models with hardware cache coherence, used in
conventional chip multiprocessors, and models that rely
upon software to manage coherence, found in compute
accelerators. In some systems, both types of models are
supported using disjoint address spaces and/or physical
memories. In this paper we present Cohesion, a hybrid
memory model that enables fine-grained temporal
reassignment of data between hardware-managed and
software-managed coherence domains, allowing a system
to support both. Cohesion can be used to dynamically
adapt to the sharing needs of both applications and
runtimes. Cohesion requires neither copy operations nor
multiple address spaces.\par
Cohesion offers the benefits of reduced message traffic
and on-die directory overhead when software-managed
coherence can be used and the advantages of hardware
coherence for cases in which software-managed coherence
is impractical. We demonstrate our protocol using a
hierarchical, cached 1024-core processor with a single
address space that supports both software-enforced
coherence and a directory-based hardware coherence
protocol. Relative to an optimistic, hardware-coherent
baseline, a realizable Cohesion design achieves
competitive performance with a 2\times reduction in
message traffic, 2.1\times reduction in directory
utilization, and greater robustness to on-die directory
capacity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "accelerator; cache coherence; computer architecture",
}
@Article{Suleman:2010:DMM,
author = "M. Aater Suleman and Onur Mutlu and Jos{\'e} A. Joao
and Khubaib and Yale N. Patt",
title = "Data marshaling for multi-core architectures",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "441--450",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816020",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Previous research has shown that Staged Execution
(SE), i.e., dividing a program into segments and
executing each segment at the core that has the data
and/or functionality to best run that segment, can
improve performance and save power. However, SE's
benefit is limited because most segments access {\em
inter-segment data}, i.e., data generated by the
previous segment. When consecutive segments run on
different cores, accesses to inter-segment data incur
cache misses, thereby reducing performance. This paper
proposes {\em Data Marshaling (DM)}, a new technique to
eliminate cache misses to inter-segment data. DM uses
profiling to identify instructions that generate
inter-segment data, and adds only 96 bytes/core of
storage overhead. We show that DM significantly
improves the performance of two promising Staged
Execution models, Accelerated Critical Sections and
producer-consumer pipeline parallelism, on both
homogeneous and heterogeneous multi-core systems. In
both models, DM can achieve almost all of the potential
of ideally eliminating cache misses to inter-segment
data. DM's performance benefit increases with the
number of cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "cmp; critical sections; pipelining; staged execution",
}
@Article{Lee:2010:DGV,
author = "Victor W. Lee and Changkyu Kim and Jatin Chhugani and
Michael Deisher and Daehyun Kim and Anthony D. Nguyen
and Nadathur Satish and Mikhail Smelyanskiy and
Srinivas Chennupaty and Per Hammarlund and Ronak
Singhal and Pradeep Dubey",
title = "Debunking the {100X} {GPU} vs. {CPU} myth: an
evaluation of throughput computing on {CPU} and {GPU}",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "451--460",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1816021",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent advances in computing have led to an explosion
in the amount of data being generated. Processing the
ever-growing data in a timely manner has made
throughput computing an important aspect for emerging
applications. Our analysis of a set of important
throughput computing kernels shows that there is an
ample amount of parallelism in these kernels which
makes them suitable for today's multi-core CPUs and
GPUs. In the past few years there have been many
studies claiming GPUs deliver substantial speedups
(between 10X and 1000X) over multi-core CPUs on these
kernels. To understand where such large performance
difference comes from, we perform a rigorous
performance analysis and find that after applying
optimizations appropriate for both CPUs and GPUs the
performance gap between an Nvidia GTX280 processor and
the Intel Core i7-960 processor narrows to only 2.5x on
average. In this paper, we discuss optimization
techniques for both CPU and GPU, analyze what
architecture features contributed to performance
differences between the two architectures, and
recommend a set of architectural features which provide
significant improvement in architectural efficiency for
throughput kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "CPU architecture; GPU architecture; performance
analysis; performance measurement; software
optimization; throughput computing",
}
@Article{Sridharan:2010:UHV,
author = "Vilas Sridharan and David R. Kaeli",
title = "Using hardware vulnerability factors to enhance {AVF}
analysis",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "461--472",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816023",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Fault tolerance is now a primary design constraint for
all major microprocessors. One step in determining a
processor's compliance to its failure rate target is
measuring the Architectural Vulnerability Factor (AVF)
of each on-chip structure. The AVF of a hardware
structure is the probability that a fault in the
structure will affect the output of a program. While
AVF generates meaningful insight into system behavior,
it cannot quantify the vulnerability of an individual
system component (hardware, user program, etc.),
limiting the amount of insight that can be generated.
To address this, prior work has introduced the Program
Vulnerability Factor (PVF) to quantify the
vulnerability of software. In this paper, we introduce
and analyze the Hardware Vulnerability Factor (HVF) to
quantify the vulnerability of hardware.\par
HVF has three concrete benefits which we examine in
this paper. First, HVF analysis can provide insight to
hardware designers beyond that gained from AVF analysis
alone. Second, separating AVF analysis into HVF and PVF
steps can accelerate the AVF measurement process.
Finally, HVF measurement enables runtime AVF estimation
that combines compile-time PVF estimates with runtime
HVF measurements. A key benefit of this technique is
that it allows software developers to influence the
runtime AVF estimates. We demonstrate that this
technique can estimate AVF at runtime with an average
absolute error of less than 3\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "architectural vulnerability factor; fault tolerance;
reliability",
}
@Article{Ansari:2010:NES,
author = "Amin Ansari and Shuguang Feng and Shantanu Gupta and
Scott Mahlke",
title = "{Necromancer}: enhancing system throughput by
animating dead cores",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "473--484",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816024",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Aggressive technology scaling into the nanometer
regime has led to a host of reliability challenges in
the last several years. Unlike on-chip caches, which
can be efficiently protected using conventional
schemes, the general core area is less homogeneous and
structured, making tolerating defects a much more
challenging problem. Due to the lack of effective
solutions, disabling non-functional cores is a common
practice in industry to enhance manufacturing yield,
which results in a significant reduction in system
throughput. Although a faulty core cannot be trusted to
correctly execute programs, we observe in this work
that for most defects, when starting from a valid
architectural state, execution traces on a defective
core actually coarsely resemble those of fault-free
executions. In light of this insight, we propose a
robust and heterogeneous core coupling execution
scheme, Necromancer, that exploits a functionally dead
core to improve system throughput by supplying hints
regarding high-level program behavior. We partition the
cores in a conventional CMP system into multiple groups
in which each group shares a lightweight core that can
be substantially accelerated using these execution
hints from a potentially dead core. To prevent this
{\em undead\/} core from wandering too far from the
correct path of execution, we dynamically resynchronize
architectural state with the lightweight core. For a
4-core CMP system, on average, our approach enables the
coupled core to achieve 78.5\% of the performance of a
fully functioning core. This defect tolerance and
throughput enhancement comes at modest area and power
overheads of 5.3\% and 8.5\%, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "execution abstraction; heterogeneous core coupling;
manufacturing defects",
}
@Article{Yan:2010:LCL,
author = "Guihai Yan and Xiaoyao Liang and Yinhe Han and Xiaowei
Li",
title = "Leveraging the core-level complementary effects of
{PVT} variations to reduce timing emergencies in
multi-core processors",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "485--496",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816025",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Process, Voltage, and Temperature (PVT) variations can
significantly degrade the performance benefits expected
from next nanoscale technology. The primary circuit
implication of the PVT variations is the resultant
timing emergencies. In a multi-core processor running
multiple programs, variations create spatial and
temporal unbalance across the processing cores. Most
prior schemes are dedicated to tolerating PVT
variations individually for a single core, but ignore
the opportunity of leveraging the complementary effects
between variations and the intrinsic variation
unbalance among individual cores. We find that the
notorious delay impacts from different variations are
not necessary aggregated. Cores with mild variations
can share the violent workload from cores suffering
large variations. If operated correctly, variations on
different cores can help mitigating each other and
result in a variation-mild environment. In this paper,
we propose Timing Emergency Aware Thread Migration
(TEA-TM), a delay sensor-based scheme to reduce system
timing emergencies under PVT variations. Fourier
transform and frequency domain analysis are conducted
to provide the insights and the potential of the PVT
co-optimization scheme. Experimental results show on
average TEA-TM can help save up to 24\% throughput
loss, at the same time improve the system fairness by
85\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "complimentary effects; delay sensor; PVT variations;
thread migration; timing emergency",
}
@Article{deKruijf:2010:RAF,
author = "Marc de Kruijf and Shuou Nomura and Karthikeyan
Sankaralingam",
title = "{Relax}: an architectural framework for software
recovery of hardware faults",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "497--508",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1815961.1816026",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As technology scales ever further, device
unreliability is creating excessive complexity for
hardware to maintain the illusion of perfect operation.
In this paper, we consider whether exposing hardware
fault information to software and allowing software to
control fault recovery simplifies hardware design and
helps technology scaling.\par
The combination of emerging applications and emerging
many-core architectures makes software recovery a
viable alternative to hardware-based fault recovery.
Emerging applications tend to have {\em few I/O and
memory side-effects}, which limits the amount of
information that needs checkpointing, and they allow
{\em discarding individual sub-computations\/} with
small qualitative impact. Software recovery can harness
these properties in ways that hardware recovery
cannot.\par
We describe Relax, an architectural framework for
software recovery of hardware faults. Relax includes
three core components: (1) an ISA extension that allows
software to mark regions of code for software recovery,
(2) a hardware organization that simplifies reliability
considerations and provides energy efficiency with
hardware recovery support removed, and (3) software
support for compilers and programmers to utilize the
Relax ISA. Applying Relax to counter the effects of
process variation, our results show a 20\% energy
efficiency improvement for PARSEC applications with
only minimal source code changes and simpler
hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "reliability; software recovery",
}
@Article{Nuno-Maganda:2010:TCH,
author = "Marco Nu{\~n}o-Maganda and Cesar Torres-Huitzil",
title = "A temporal coding hardware implementation for spiking
neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "2--7",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926369",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Spiking Neural Networks (SNNs) models have been
explored in recent years due to its biological
plausibility where temporal coding plays an important
role. Biological arguments and computational
experiments suggest than some perceptual tasks (vision
and olfaction for instance) are well performed by these
models. Moreover, some other applications such as
machine learning might be benefited from this approach.
However, efficient simulation and implementation of
SNNs still remain an open challenge. There are several
issues that must be addressed, being one of them the
temporal coding of real-value data itself. In order to
study the possibilities of embedded real-time
implementations of large scale SNNs, we have first
chosen to implement a well-known coding scheme based on
Gaussian Receptive Fields (GRFs) to map real-value data
into spike trains.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Morisita:2010:IEA,
author = "Hirokazu Morisita and Kenta Inakagata and Yasunori
Osana and Naoyuki Fujita and Hideharu Amano",
title = "Implementation and evaluation of an arithmetic
pipeline on {FLOPS-$2$D}: multi-{FPGA} system",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "8--13",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926370",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "UPACS (Unified Platform for Aerospace Computational
Simulation) is one of the practical CFD (Computational
Fluid Dynamics) packages supporting various
selectability. A custom machine for efficient execution
of MUSCL; a core functions of UPACS is implemented on
FLOPS-2D (Flexibly Linkable Object for Programmable
System); multi-FPGA reconfigurable system. The deep and
complicated pipeline structure generated from MUSCL
dataflow is divided and optimized into two FPGA boards
by using a tuning tool called RER. With optimization of
the order of operations and pipeline structure, about
60\% utilization of the pipeline is achieved even by
using serial links between two boards.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tse:2010:ERD,
author = "Anson H. T. Tse and David B. Thomas and K. H. Tsoi and
Wayne Luk",
title = "Efficient reconfigurable design for pricing {Asian}
options",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "14--20",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926371",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Arithmetic Asian options are financial derivatives
which have the feature of path-dependency: they depend
on the entire price path of the underlying asset,
rather than just the instantaneous price. This
path-dependency makes them difficult to price, as only
computationally intensive Monte-Carlo methods can
provide accurate prices. This paper proposes an
FPGA-accelerated Asian option pricing solution, using a
highly-optimised parallel Monte-Carlo architecture. The
proposed pipelined design is described parametrically,
facilitating its re-use for different technologies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Horita:2010:FBF,
author = "Tadayoshi Horita and Itsuo Takanami",
title = "An {FPGA}-based fast classifier with high
generalization property",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "21--26",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926372",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes a scheme to implement classifiers
with high generalization properties on FPGAs. The
classifiers consist of only combinational logic
circuits, which are based on a simple concept, and the
VHDL source files which describe the classifiers are
generated by a C-language function, tuning VHDL
notations for adders in them to reduce both its
hardware size and computation time. Simulation results
based on a character recognition are shown in terms of
generalization property, hardware size, computation
time, and electricity consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Putnam:2010:DVE,
author = "Andrew Putnam and Aaron Smith and Doug Burger",
title = "Dynamic vectorization in the {E2} dynamic multicore
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "27--32",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926373",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Previous research has shown that Explicit Data Graph
Execution (EDGE) instruction set architectures (ISA)
allow for power efficient performance scaling. In this
paper we describe the preliminary design of a new
dynamic multicore processor called E2 that utilizes an
EDGE ISA to allow for the dynamic composition of
physical cores into logical processors. We provide
details of E2's support for dynamic reconfigurability
and show how the EDGE ISA facilities out-of-order
vector execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Paek:2010:BAU,
author = "Jong Kyung Paek and Kiyoung Choi and Jongeun Lee",
title = "Binary acceleration using coarse-grained
reconfigurable architecture",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "33--39",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926374",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Coarse-grained reconfigurable architectures (CGRAs)
have been well-researched and shown to be particularly
effective in acceleration of data-intensive
applications. However, practical difficulties in
application mapping have hindered their widespread
adoption. Typically, an application must be modified
manually or by using special compilers and design tools
in order to fully exploit the architecture. This incurs
considerable design costs to the application developer
and reduces software portability. In this paper, we
propose a framework for automatic transformation of an
application at binary-level, with which the user can
execute an arbitrary application on the CGRA. Our
approach analyzes the binary code and determines which
portions of the program to accelerate, maps them to the
reconfigurable array, then modifies the binary code
appropriately to run on the CGRA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dohi:2010:IPE,
author = "Keisuke Dohi and Yuichiro Shibata and Tsuyoshi Hamada
and Tomonari Masada and Kiyoshi Oguri and Duncan A.
Buell",
title = "Implementation of a programming environment with a
multithread model for reconfigurable systems",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "40--45",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926375",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Reconfigurable systems are known to be able to achieve
higher performance than traditional microprocessor
architecture for many application fields. However, in
order to extract a full potential of the reconfigurable
systems, programmers often have to design and describe
the best suited code for their target architecture with
specialized knowledge. The aim of this paper is to
assist the users of reconfigurable systems by
implementing a translator with a multithread model. The
experimental results show our translator automatically
generates efficient performance-aware code segments
including DMA transfer and shift registers for memory
access optimization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sabeghi:2010:RMS,
author = "Mojtaba Sabeghi and Hamid Mushtaq and Koen Bertels",
title = "Runtime multitasking support on polymorphic
platforms",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "46--52",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926376",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "General purpose computers are moving towards employing
reconfigurable fabrics in order to achieve higher
performance. In such systems, serving several
applications at runtime is a challenging problem in
which the reconfigurable fabric has to be shared among
competing tasks. Because of the inherent complexity of
mapping the computation intensive tasks into the FPGA,
a comprehensive runtime system is required to address
all the conflicting issues between competing
applications' demands and to keep the system
performance at the required level. In this paper, we
present a runtime environment wherein a number of
components introduced to handle the task assignment
problem in a very low overhead manner.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tsoi:2010:PFC,
author = "Kuen Hung Tsoi and Anson H. T. Tse and Peter Pietzuch
and Wayne Luk",
title = "Programming framework for clusters with heterogeneous
accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "53--59",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926377",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We describe a programming framework for high
performance clusters with various hardware
accelerators. In this framework, users can utilize the
available heterogeneous resources productively and
efficiently. The distributed application is highly
modularized to support dynamic system configuration
with changing types and number of the accelerators.
Multiple layers of communication interface are
introduced to reduce the overhead in both control
messages and data transfers. Parallelism can be
achieved by controlling the accelerators in various
schemes through scheduling extension. The framework has
been used to support physics simulation and financial
application development.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tadonki:2010:ECL,
author = "Claude Tadonki and Gilbert Grodidier and Olivier
Pene",
title = "An efficient {CELL} library for lattice quantum
chromodynamics",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "60--65",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926378",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Quantum chromodynamics (QCD) is the theory of
subnuclear physics, aiming at modeling the strong
nuclear force, which is responsible for the
interactions of nuclear particles. Numerical QCD
studies are performed through a discrete formalism
called LQCD (Lattice Quantum Chromodynamics). Typical
simulations involve very large volume of data and
numerically sensitive entities, thus the crucial need
of high performance computing systems. We propose a set
of CELL-accelerated routines for basic LQCD
calculations. Our framework is provided as a unified
library and is particularly optimized for an iterative
use. Each routine is parallelized among the SPUs, and
each SPU achieves it task by looping on small chunk of
arrays from the main memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Taylor:2010:SBB,
author = "Ryan Taylor and Xiaoming Li",
title = "Software-based branch predication for {AMD GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "66--72",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926379",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Branch predication is a program transformation
technique that combines instructions of multiple
branches of an if statement into a straight-line
sequence and associates each instruction of the
sequence with a predicate. The branch predication
improves the execution of branch statements on
processors that support predicated execution of
instruction, e.g., Intel IA-64, because such
transformation improves the instruction scheduling and
might help cache performance. This paper proposes a
novel software-based branch predication technique for
GPU. The main motivation is that branch instructions
can easily become a performance bottleneck for a GPU
program because of the cost of branch instructions
compared to ALU instructions and the possibility of low
ALU utilization due to separation of ALU instructions
within control flow blocks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Banescu:2010:MFP,
author = "Sebastian Banescu and Florent de Dinechin and Bogdan
Pasca and Radu Tudoran",
title = "Multipliers for floating-point double precision and
beyond on {FPGAs}",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "73--79",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926380",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The implementation of high-precision floating-point
applications on reconfigurable hardware requires large
multipliers. Full multipliers are the core of
floating-point multipliers. Truncated multipliers,
trading resources for a well-controlled accuracy
degradation, are useful building blocks in situations
where a full multiplier is not needed.\par
This work studies the automated generation of such
multipliers using the embedded multipliers and adders
present in the DSP blocks of current FPGAs. The
optimization of such multipliers is expressed as a
tiling problem, where a tile represents a hardware
multiplier, and super-tiles represent combinations of
several hardware multipliers and adders, making
efficient use of the DSP internal resources. This
tiling technique is shown to adapt to full or truncated
multipliers. It addresses arbitrary precisions
including single, double but also the quadruple
precision introduced by the IEEE-754-2008 standard and
currently unsupported by processor hardware. An
open-source implementation is provided in the FloPoCo
project.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sano:2010:PIA,
author = "Kentaro Sano and Luzhou Wang and Satoru Yamamoto",
title = "Prototype implementation of array-processor extensible
over multiple {FPGAs} for scalable stencil
computation",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "80--86",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926381",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper demonstrates and evaluates the performance
and the scalability of the systolic
computational-memory array (SCMA) for stencil
computation, which is a typical computing kernel of
scientific simulation. We describe the basic
architecture of th SCMA, and show the requirements and
the design of SCMAs to scalably operate over multiple
devices. We implement a prototype of the SCMA with
three ALTERA Stratix III FPGAs, which form a 1--3 FPGA
array by connecting three DE3 boards with different
clock sources. The prototype SCMA demonstrates that the
difference in operating clock frequency hardly
influences the total execution cycles while it slightly
causes stall cycles to sub-SCMAs on different FPGAs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tsang:2010:DPR,
author = "Chi-Chiu Tsang and Hayden Kwok-Hay So",
title = "Dynamic power reduction of {FPGA}-based reconfigurable
computers using precomputation",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "87--92",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926382",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper examines the effectiveness of employing
precomputation techniques to reduce power consumption
of field configurable computing systems. Multiplier is
modified with precomputation techniques and are
implemented using commercial off-the-shelf FPGAs.
Precomputation techniques reduce dynamic power
consumption of a module by eliminating unnecessary
signal switching activities in inactive portions of the
modules. Experiments have shown that up to 52\% of
logic and signal power consumption can be reduced in
multiplier module. Furthermore, when compared to ASIC
implementations, FPGA implementations of precomputation
modules have the advantage of lower area overhead as
most of them can be implemented using originally
unoccupied related FPGA resources.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2010:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "4",
pages = "93--96",
month = sep,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1926367.1926384",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 20 14:27:03 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mukherjee:2010:NAC,
author = "Manideepa Mukherjee and Amitabha Sinha",
title = "A novel architecture for conversion of binary to
single digit double base numbers",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "5",
pages = "1--6",
month = dec,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1978907.1978909",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 13 11:25:46 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Double base number systems are increasingly attractive
for many compute intensive applications especially in
signal processing because of their capabilities of
handling arithmetic operations efficiently. However,
the complexity involved in converting binary to DBNS
becomes a major bottleneck and the efficiency of
performance goes down drastically due to the complexity
involved in conversion. Since complexity of multi digit
DBNS multiplications and additions increases with the
number of digits (index i,j), in this paper a novel
conversion scheme has been proposed where a given
binary number will be converted to a single digit
(index i,j) double base number. The proposed scheme not
only reduces the hardware complexity of the arithmetic
operations but also reduces the time of execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{T:2010:DDF,
author = "Shobha T. and Syed Akram and G. Varaprasad",
title = "Design and development of framework for diagnosing
intermediate nodes",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "5",
pages = "7--11",
month = dec,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1978907.1978910",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 13 11:25:46 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A framework is an integrated system that sets the
rules of Automation of a specific product. This system
integrates the function libraries, test data sources,
object details and various reusable modules. This paper
proposes a framework, used for diagnosing and
performance analysis of intermediate network nodes such
as load balancer, routers, servers etc. For analyzing
the performance $m$ number of servers and $n$ number of
clients are considered. This framework will help
developers working on network nodes to check for the
performance of network node component and also to
detect the errors in the algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tabba:2010:ACP,
author = "Fuad Tabba",
title = "Adding concurrency in {Python} using a commercial
processor's hardware transactional memory support",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "5",
pages = "12--19",
month = dec,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1978907.1978911",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 13 11:25:46 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper reports on our experiences of using a
commercial processor's best-effort hardware
transactional memory to improve concurrency in CPython,
the reference Python implementation. CPython protects
its data structures using a single global lock, which
inhibits parallelism when running multiple
threads.\par
We modified the CPython interpreter to use best-effort
hardware transactions available in Sun's Rock
processor, and fall back on the single global lock when
unable to commit in hardware. The modifications were
minimal; however, we had to restructure some of
CPython's shared data structures to handle false
conflicts arising from CPython's management of the
shared data. Our results show that the modified CPython
interpreter can run small, simple, workloads and scale
almost linearly, while improving the concurrency of
more complex workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomasian:2010:WSD,
author = "Alexander Thomasian",
title = "Why specialized disks for composite operations may be
unnecessary",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "5",
pages = "20--27",
month = dec,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1978907.1978912",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 13 11:25:46 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Disk arrays with erasure coding such as RAID5 and
RAID6 incur four and six disk accesses respectively for
updating data and check blocks. The small write penalty
can be reduced by the Read-Modify-Write (RMW) composite
operations to update data and associated check blocks.
The Disk Architecture with Composite Operation (DACO)
is a proposal to eliminate the disk rotation associated
with RMWs, by using a complex read/write head, which
allows the writing of a block immediately after reading
and modifying it without needing an extra disk
rotation. We argue that the extra cost associated with
DACO may not be justifiable, because it is not expected
to have a significant impact on RAID performance.
Furthermore an XOR capability is still required at the
disk array controller for reconstructing missing data
blocks. A duplexed Nonvolatile Storage (NVS) cache at
the disk array controller provides the same reliability
as magnetic disks and allows fast writes, i.e., writing
to disk is considered completed as soon as data is
written onto NVS. Deferring the destaging of data
blocks from NVS allows these blocks to be overwritten,
obviating unnecessary disk writes. This also allows
neighboring dirty blocks to be destaged in batches, so
that a higher disk access efficiency is attained. Disks
with multiple arms can also be used to make the
processing of RMW requests more efficient, while disks
with multiple R/W heads on one arm have little effect
on RMW requests. In addition there are alternative
methods to update check blocks, such as floating
parities, parity logging, the reconstruct write method,
log structured arrays, and variable scope parity
protection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2010:INc,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "5",
pages = "28--36",
month = dec,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1978907.1978914",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 13 11:25:46 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Larus:2011:CWC,
author = "James R. Larus",
title = "The cloud will change everything",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "1--2",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950367",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yuan:2011:ISD,
author = "Ding Yuan and Jing Zheng and Soyeon Park and Yuanyuan
Zhou and Stefan Savage",
title = "Improving software diagnosability via log
enhancement",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "3--14",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950369",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Veeraraghavan:2011:DPS,
author = "Kaushik Veeraraghavan and Dongyoon Lee and Benjamin
Wester and Jessica Ouyang and Peter M. Chen and Jason
Flinn and Satish Narayanasamy",
title = "{DoublePlay}: parallelizing sequential logging and
replay",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "15--26",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950370",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Casper:2011:HAT,
author = "Jared Casper and Tayo Oguntebi and Sungpack Hong and
Nathan G. Bronson and Christos Kozyrakis and Kunle
Olukotun",
title = "Hardware acceleration of transactional memory on
commodity systems",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "27--38",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950372",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dalessandro:2011:HNC,
author = "Luke Dalessandro and Fran{\c{c}}ois Carouge and Sean
White and Yossi Lev and Mark Moir and Michael L. Scott
and Michael F. Spear",
title = "Hybrid {NOrec}: a case study in the effectiveness of
best effort hardware transactional memory",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "39--52",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950373",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singh:2011:EPS,
author = "Abhayendra Singh and Daniel Marino and Satish
Narayanasamy and Todd Millstein and Madan Musuvathi",
title = "Efficient processor support for {DRFx}, a memory model
with exceptions",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "53--66",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950375",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Devietti:2011:RRC,
author = "Joseph Devietti and Jacob Nelson and Tom Bergan and
Luis Ceze and Dan Grossman",
title = "{RCDC}: a relaxed consistency deterministic computer",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "67--78",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950376",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Burnim:2011:SCS,
author = "Jacob Burnim and George Necula and Koushik Sen",
title = "Specifying and checking semantic atomicity for
multithreaded programs",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "79--90",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950377",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Volos:2011:MLP,
author = "Haris Volos and Andres Jaan Tack and Michael M.
Swift",
title = "{Mnemosyne}: lightweight persistent memory",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "91--104",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950379",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Coburn:2011:NHM,
author = "Joel Coburn and Adrian M. Caulfield and Ameen Akel and
Laura M. Grupp and Rajesh K. Gupta and Ranjit Jhala and
Steven Swanson",
title = "{NV-Heaps}: making persistent objects fast and safe
with next-generation, non-volatile memories",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "105--118",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950380",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Schupbach:2011:DLA,
author = "Adrian Sch{\"u}pbach and Andrew Baumann and Timothy
Roscoe and Simon Peter",
title = "A declarative language approach to device
configuration",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "119--132",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950382",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ryzhyk:2011:IDD,
author = "Leonid Ryzhyk and John Keys and Balachandra Mirla and
Arun Raghunath and Mona Vij and Gernot Heiser",
title = "Improved device driver reliability through hardware
verification reuse",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "133--144",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950383",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hashmi:2011:CNI,
author = "Atif Hashmi and Andrew Nere and James Jamal Thomas and
Mikko Lipasti",
title = "A case for neuromorphic {ISAs}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "145--158",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950385",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ransford:2011:MSS,
author = "Benjamin Ransford and Jacob Sorber and Kevin Fu",
title = "{Mementos}: system support for long-running
computation on {RFID}-scale devices",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "159--170",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950386",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Koukoumidis:2011:PC,
author = "Emmanouil Koukoumidis and Dimitrios Lymberopoulos and
Karin Strauss and Jie Liu and Doug Burger",
title = "Pocket cloudlets",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "171--184",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950387",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sharma:2011:BMS,
author = "Navin Sharma and Sean Barker and David Irwin and
Prashant Shenoy",
title = "{Blink}: managing server clusters on intermittent
power",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "185--198",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950389",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hoffmann:2011:DKR,
author = "Henry Hoffmann and Stelios Sidiroglou and Michael
Carbin and Sasa Misailovic and Anant Agarwal and Martin
Rinard",
title = "Dynamic knobs for responsive power-aware computing",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "199--212",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950390",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:2011:FSD,
author = "Song Liu and Karthik Pattabiraman and Thomas
Moscibroda and Benjamin G. Zorn",
title = "{Flikker}: saving {DRAM} refresh-power through
critical data partitioning",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "213--224",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950391",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Deng:2011:MAL,
author = "Qingyuan Deng and David Meisner and Luiz Ramos and
Thomas F. Wenisch and Ricardo Bianchini",
title = "{MemScale}: active low-power modes for main memory",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "225--238",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950392",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gao:2011:TMH,
author = "Qi Gao and Wenbin Zhang and Zhezhe Chen and Mai Zheng
and Feng Qin",
title = "{2ndStrike}: toward manifesting hidden concurrency
typestate bugs",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "239--250",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950394",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2011:CDC,
author = "Wei Zhang and Junghee Lim and Ramya Olichandran and
Joel Scherpelz and Guoliang Jin and Shan Lu and Thomas
Reps",
title = "{ConSeq}: detecting concurrency bugs through
sequential errors",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "251--264",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950395",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chipounov:2011:SPV,
author = "Vitaly Chipounov and Volodymyr Kuznetsov and George
Candea",
title = "{S2E}: a platform for in-vivo multi-path analysis of
software systems",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "265--278",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950396",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hofmann:2011:EOS,
author = "Owen S. Hofmann and Alan M. Dunn and Sangman Kim and
Indrajit Roy and Emmett Witchel",
title = "Ensuring operating system kernel integrity with
{OSck}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "279--290",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950398",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Porter:2011:RLT,
author = "Donald E. Porter and Silas Boyd-Wickizer and Jon
Howell and Reuben Olinsky and Galen C. Hunt",
title = "Rethinking the library {OS} from the top down",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "291--304",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950399",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Palix:2011:FLT,
author = "Nicolas Palix and Ga{\"e}l Thomas and Suman Saha and
Christophe Calv{\`e}s and Julia Lawall and Gilles
Muller",
title = "Faults in {Linux}: ten years later",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "305--318",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950401",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In 2001, Chou et al. published a study of faults found
by applying a static analyzer to Linux versions 1.0
through 2.4.1. A major result of their work was that
the drivers directory contained up to 7 times more of
certain kinds of faults than other directories. This
result inspired a number of development and research
efforts on improving the reliability of driver code.
Today Linux is used in a much wider range of
environments, provides a much wider range of services,
and has adopted a new development and release model.
What has been the impact of these changes on code
quality? Are drivers still a major problem?\par
To answer these questions, we have transported the
experiments of Chou et al. to Linux versions 2.6.0 to
2.6.33, released between late 2003 and early 2010. We
find that Linux has more than doubled in size during
this period, but that the number of faults per line of
code has been decreasing. And, even though drivers
still accounts for a large part of the kernel code and
contains the most faults, its fault rate is now below
that of other directories, such as arch (HAL) and fs
(file systems). These results can guide further
development and research efforts. To enable others to
continually update these results as Linux evolves, we
define our experimental protocol and make our checkers
and results available in a public archive.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Esmaeilzadeh:2011:LBL,
author = "Hadi Esmaeilzadeh and Ting Cao and Yang Xi and Stephen
M. Blackburn and Kathryn S. McKinley",
title = "Looking back on the language and hardware revolutions:
measured power, performance, and scaling",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "319--332",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950402",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nguyen:2011:SCS,
author = "Donald Nguyen and Keshav Pingali",
title = "Synthesizing concurrent schedulers for irregular
algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "333--344",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950404",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hoang:2011:ECT,
author = "Giang Hoang and Robby Bruce Findler and Russ Joseph",
title = "Exploring circuit timing-aware language and
compilation",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "345--356",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950405",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Farhad:2011:OAM,
author = "Sardar M. Farhad and Yousun Ko and Bernd Burgstaller
and Bernhard Scholz",
title = "Orchestration by approximation: mapping stream
programs onto multicore architectures",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "357--368",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950406",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhang:2011:FED,
author = "Eddy Z. Zhang and Yunlian Jiang and Ziyu Guo and Kai
Tian and Xipeng Shen",
title = "On-the-fly elimination of dynamic irregularities for
{GPU} computing",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "369--380",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950408",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hormati:2011:SPS,
author = "Amir H. Hormati and Mehrzad Samadi and Mark Woh and
Trevor Mudge and Scott Mahlke",
title = "{Sponge}: portable stream programming on graphics
engines",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "381--392",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950409",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kamruzzaman:2011:ICP,
author = "Md Kamruzzaman and Steven Swanson and Dean M.
Tullsen",
title = "Inter-core prefetching for multicore processors using
migrating helper threads",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "393--404",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950411",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hayashizaki:2011:IPT,
author = "Hiroshige Hayashizaki and Peng Wu and Hiroshi Inoue
and Mauricio J. Serrano and Toshio Nakatani",
title = "Improving the performance of trace-based systems by
false loop filtering",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "1",
pages = "405--418",
month = mar,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1961295.1950412",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Aug 18 13:45:25 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Binkert:2011:GS,
author = "Nathan Binkert and Bradford Beckmann and Gabriel Black
and Steven K. Reinhardt and Ali Saidi and Arkaprava
Basu and Joel Hestness and Derek R. Hower and Tushar
Krishna and Somayeh Sardashti and Rathijit Sen and
Korey Sewell and Muhammad Shoaib and Nilay Vaish and
Mark D. Hill and David A. Wood",
title = "The {\tt gem5} simulator",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "2",
pages = "1--7",
month = may,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024716.2024718",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 1 17:35:28 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The gem5 simulation infrastructure is the merger of
the best aspects of the M5 [4] and GEMS [9] simulators.
M5 provides a highly configurable simulation framework,
multiple ISAs, and diverse CPU models. GEMS complements
these features with a detailed and flexible memory
system, including support for multiple cache coherence
protocols and interconnect models. Currently, gem5
supports most commercial ISAs (ARM, ALPHA, MIPS, Power,
SPARC, and x86), including booting Linux on three of
them (ARM, ALPHA, and x86). The project is the result
of the combined efforts of many academic and industrial
institutions, including AMD, ARM, HP, MIPS, Princeton,
MIT, and the Universities of Michigan, Texas, and
Wisconsin.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomasian:2011:SAD,
author = "Alexander Thomasian",
title = "Survey and analysis of disk scheduling methods",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "2",
pages = "8--25",
month = may,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024716.2024719",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 1 17:35:28 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Performance of many important computer applications
depends on the performance of Hard Disk Drives (HDDs).
Disk capacities and transfer rates have been increasing
rapidly, but the improvement in disk access time is
disappointingly slow. Caching and prefetching are two
method to alleviate this delay, which is 6-7 orders of
magnitude longer than the processor cycle time. Disk
scheduling is desirable when the data is not cached and
a disk access is required. This paper is concerned with
the analysis of two disk arm scheduling methods: SATF
(shortest access time first) which outperforms SCAN,
while both methods outperform FCFS scheduling. We
propose improvements to a recent analysis of the SCAN
policy and carry out an empirical investigation of SATF
performance to derive a relationship between the
queue-length and mean service time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{K:2011:LPT,
author = "Thimmarayaswamy K and Mary M. Dsouza and G.
Varaprasad",
title = "Low power techniques for an {Android} based phone",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "2",
pages = "26--35",
month = may,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024716.2024720",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 1 17:35:28 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Android is the latest trend in mobile operating
systems. Even though Android provides a complete set of
application, middleware and Linux kernel for the phone
applications developer, it does not fully utilize
several standard kernel features. This work attempts to
address the limitations of Android specific to power
management at kernel level and proposes possible
solutions for active and static power management in
Linux to overcome these limitations. The developed
solutions for active power management include selection
of suitable governor algorithm and modification of its
parameters and implementation of a daemon process,
which performs voltage and frequency scaling.
Application level low power techniques for Android are
also proposed to help application developers to
optimize their software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2011:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "2",
pages = "36--52",
month = may,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024716.2024722",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 1 17:35:28 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hashmi:2011:AAF,
author = "Atif Hashmi and Hugues Berry and Olivier Temam and
Mikko Lipasti",
title = "Automatic abstraction and fault tolerance in cortical
microachitectures",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "1--10",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000066",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Choudhary:2011:FCS,
author = "Niket K. Choudhary and Salil V. Wadhavkar and Tanmay
A. Shah and Hiran Mayukh and Jayneel Gandhi and Brandon
H. Dwiel and Sandeep Navada and Hashem H. Najaf-abadi
and Eric Rotenberg",
title = "{FabScalar}: composing synthesizable {RTL} designs of
arbitrary cores within a canonical superscalar
template",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "11--22",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000067",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gunadi:2011:CCR,
author = "Erika Gunadi and Mikko H. Lipasti",
title = "{CRIB}: consolidated rename, issue, and bypass",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "23--32",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000068",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:2011:FIF,
author = "Rishi Agarwal and Josep Torrellas",
title = "{FlexBulk}: intelligently forming atomic blocks in
blocked-execution multiprocessors to minimize
squashes",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "33--44",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000070",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kwon:2011:VPA,
author = "Youngjin Kwon and Changdae Kim and Seungryoul Maeng
and Jaehyuk Huh",
title = "Virtualizing performance asymmetric multi-core
systems",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "45--56",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000071",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sanchez:2011:VSE,
author = "Daniel Sanchez and Christos Kozyrakis",
title = "{Vantage}: scalable and efficient fine-grain cache
partitioning",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "57--68",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000073",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mishra:2011:ACI,
author = "Asit K. Mishra and Xiangyu Dong and Guangyu Sun and
Yuan Xie and N. Vijaykrishnan and Chita R. Das",
title = "Architecting on-chip interconnects for stacked {$3$D}
{STT-RAM} caches in {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "69--80",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000074",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gaur:2011:BIA,
author = "Jayesh Gaur and Mainak Chaudhuri and Sreenivas
Subramoney",
title = "Bypass and insertion algorithms for exclusive
last-level caches",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "81--92",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000075",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cuesta:2011:IED,
author = "Blas A. Cuesta and Alberto Ros and Mar{\'\i}a E.
G{\'o}mez and Antonio Robles and Jos{\'e} F. Duato",
title = "Increasing the effectiveness of directory caches by
deactivating coherence for private memory blocks",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "93--104",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000076",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oh:2011:TSM,
author = "Jungju Oh and Milos Prvulovic and Alenka Zajic",
title = "{TLSync}: support for multiple fast barriers using
on-chip transmission lines",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "105--116",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000078",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Crago:2011:OEM,
author = "Neal Clayton Crago and Sanjay Jeram Patel",
title = "{OUTRIDER}: efficient memory latency tolerance with
decoupled strands",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "117--128",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000079",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:2011:ETB,
author = "Yunsup Lee and Rimas Avizienis and Alex Bishara and
Richard Xia and Derek Lockhart and Christopher Batten
and Krste Asanovi{\'c}",
title = "Exploring the tradeoffs between programmability and
efficiency in data-parallel accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "129--140",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000080",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ebrahimi:2011:PAS,
author = "Eiman Ebrahimi and Chang Joo Lee and Onur Mutlu and
Yale N. Patt",
title = "Prefetch-aware shared resource management for
multi-core systems",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "141--152",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000081",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agarwal:2011:RSC,
author = "Rishi Agarwal and Pranav Garg and Josep Torrellas",
title = "Rebound: scalable checkpointing for coherent shared
memory",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "153--164",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000083",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Greathouse:2011:DDS,
author = "Joseph L. Greathouse and Zhiqiang Ma and Matthew I.
Frank and Ramesh Peri and Todd Austin",
title = "Demand-driven software race detection using hardware
performance counters",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "165--176",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000084",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chhabra:2011:NSN,
author = "Siddhartha Chhabra and Yan Solihin",
title = "{i-NVMM}: a secure non-volatile main memory system
with incremental encryption",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "177--188",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000086",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tiwari:2011:CUM,
author = "Mohit Tiwari and Jason K. Oberg and Xun Li and
Jonathan Valamehr and Timothy Levin and Ben Hardekopf
and Ryan Kastner and Frederic T. Chong and Timothy
Sherwood",
title = "Crafting a usable microkernel, processor, and {I/O}
system with strict and provable information flow
security",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "189--200",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000087",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nomura:2011:SDP,
author = "Shuou Nomura and Matthew D. Sinclair and Chen-Han Ho
and Venkatraman Govindaraju and Marc de Kruijf and
Karthikeyan Sankaralingam",
title = "Sampling $+$ {DMR}: practical and low-overhead
permanent fault detection",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "201--212",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000089",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sudhakrishnan:2011:REB,
author = "Sangeetha Sudhakrishnan and Rigo Dicochea and Jose
Renau",
title = "Releasing efficient beta cores to market early",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "213--222",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000090",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Manoochehri:2011:CCP,
author = "Mehrtash Manoochehri and Murali Annavaram and Michel
Dubois",
title = "{CPPC}: correctable parity protected cache",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "223--234",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000091",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gebhart:2011:EEM,
author = "Mark Gebhart and Daniel R. Johnson and David Tarjan
and Stephen W. Keckler and William J. Dally and Erik
Lindholm and Kevin Skadron",
title = "Energy-efficient mechanisms for managing thread
context in throughput processors",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "235--246",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000093",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yu:2011:SDH,
author = "Wing-kei S. Yu and Ruirui Huang and Sarah Q. Xu and
Sung-En Wang and Edwin Kan and G. Edward Suh",
title = "{SRAM--DRAM} hybrid memory with applications to
efficient register files in fine-grained
multi-threading",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "247--258",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000094",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fu:2011:ATM,
author = "Binzhang Fu and Yinhe Han and Jun Ma and Huawei Li and
Xiaowei Li",
title = "An abacus turn model for time\slash space-efficient
reconfigurable routing",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "259--270",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000096",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Carpenter:2011:CGS,
author = "Aaron Carpenter and Jianyun Hu and Jie Xu and Michael
Huang and Hui Wu",
title = "A case for globally shared-medium on-chip
interconnect",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "271--282",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000097",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tang:2011:IMS,
author = "Lingjia Tang and Jason Mars and Neil Vachharajani and
Robert Hundt and Mary Lou Soffa",
title = "The impact of memory subsystem resource sharing on
datacenter applications",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "283--294",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000099",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yoon:2011:AGM,
author = "Doe Hyun Yoon and Min Kyu Jeong and Mattan Erez",
title = "Adaptive granularity memory systems: a tradeoff
between storage efficiency and throughput",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "295--306",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000100",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barr:2011:SMS,
author = "Thomas W. Barr and Alan L. Cox and Scott Rixner",
title = "{SpecTLB}: a mechanism for speculative address
translation",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "307--318",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000101",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Meisner:2011:PMO,
author = "David Meisner and Christopher M. Sadler and Luiz
Andr{\'e} Barroso and Wolf-Dietrich Weber and Thomas F.
Wenisch",
title = "Power management of online data-intensive services",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "319--330",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000103",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Biswas:2011:FFF,
author = "Susmit Biswas and Mohit Tiwari and Timothy Sherwood
and Luke Theogarajan and Frederic T. Chong",
title = "Fighting fire with fire: modeling the datacenter-scale
effects of targeted superlattice thermal management",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "331--340",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000104",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Govindan:2011:BLT,
author = "Sriram Govindan and Anand Sivasubramaniam and Bhuvan
Urgaonkar",
title = "Benefits and limitations of tapping into stored energy
for datacenters",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "341--352",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000105",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Demme:2011:RIA,
author = "John Demme and Simha Sethumadhavan",
title = "Rapid identification of architectural bottlenecks via
precise event counting",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "353--364",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000107",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Esmaeilzadeh:2011:DSE,
author = "Hadi Esmaeilzadeh and Emily Blem and Renee {St. Amant}
and Karthikeyan Sankaralingam and Doug Burger",
title = "Dark silicon and the end of multicore scaling",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "365--376",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000108",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sun:2011:MME,
author = "Guangyu Sun and Christopher J. Hughes and Changkyu Kim
and Jishen Zhao and Cong Xu and Yuan Xie and Yen-Kuang
Chen",
title = "{Moguls}: a model to explore the memory hierarchy for
bandwidth improvements",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "377--388",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000109",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mishra:2011:CHC,
author = "Asit K. Mishra and N. Vijaykrishnan and Chita R. Das",
title = "A case for heterogeneous on-chip interconnects for
{CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "389--400",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000111",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Grot:2011:KNH,
author = "Boris Grot and Joel Hestness and Stephen W. Keckler
and Onur Mutlu",
title = "{Kilo-NOC}: a heterogeneous network-on-chip
architecture for scalability and service guarantees",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "401--412",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000112",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ma:2011:DER,
author = "Sheng Ma and Natalie Enright Jerger and Zhiying Wang",
title = "{DBAR}: an efficient routing algorithm to support
multiple concurrent applications in networks-on-chip",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "413--424",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000113",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Udipi:2011:CMC,
author = "Aniruddha N. Udipi and Naveen Muralimanohar and Rajeev
Balasubramonian and Al Davis and Norman P. Jouppi",
title = "Combining memory and a controller with photonics
through {$3$D}-stacking to enable scalable and
energy-efficient systems",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "425--436",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000115",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Binkert:2011:ROF,
author = "Nathan Binkert and Al Davis and Norman P. Jouppi and
Moray McLaren and Naveen Muralimanohar and Robert
Schreiber and Jung Ho Ahn",
title = "The role of optics in future high radix switch
design",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "437--448",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000116",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ma:2011:SPC,
author = "Kai Ma and Xue Li and Ming Chen and Xiaorui Wang",
title = "Scalable power control for many-core architectures
running multi-threaded applications",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "449--460",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000117",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alameldeen:2011:EEC,
author = "Alaa R. Alameldeen and Ilya Wagner and Zeshan Chishti
and Wei Wu and Chris Wilkerson and Shih-Lien Lu",
title = "Energy-efficient cache design using variable-strength
error-correcting codes",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "461--472",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2000118",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Barroso:2011:WSC,
author = "Luiz Andre Barroso",
title = "Warehouse-Scale Computing: Entering the Teenage
Decade",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "??--??",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2019527",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ferrucci:2011:IWD,
author = "David A. Ferrucci",
title = "{IBM}'s {Watson\slash DeepQA}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "??--??",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2019525",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kannan:2011:ARH,
author = "Ravi Kannan",
title = "Algorithms: Recent Highlights and Challenges",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "3",
pages = "??--??",
month = jun,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2024723.2019526",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 5 17:15:11 MDT 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Leeser:2011:CWP,
author = "Miriam Leeser and Devon Yablonski and Dana Brooks and
Laurie Smith King",
title = "The challenges of writing portable, correct and high
performance libraries for {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "2--7",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082158",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Graphics Processing Units (GPUs) are widely used to
accelerate scientific applications. Many successes have
been reported with speedups of two or three orders of
magnitude over serial implementations of the same
algorithms. These speedups typically pertain to a
specific implementation with fixed parameters mapped to
a specific hardware implementation. The implementations
are not designed to be easily ported to other GPUs,
even from the same manufacturer. When target hardware
changes, the application must be re-optimized. In this
paper we address a different problem. We aim to deliver
working, efficient GPU code in a library that is
downloaded and run by many different users.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tsoi:2011:PPO,
author = "Kuen Hung Tsoi and Wayne Luk",
title = "Power profiling and optimization for heterogeneous
multi-core systems",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "8--13",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082159",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Processing speed and energy efficiency are two of the
most critical issues for computer systems. This paper
presents a systematic approach for profiling the power
and performance characteristics of application
targeting heterogeneous multi-core computing platforms.
Our approach enables rapid and automated design space
exploration involving optimisation of workload
distribution for systems with accelerators such as
FPGAs and GPUs. We demonstrate that, with minor
modification to the design, it is possible to estimate
performance and power efficiency trade off to identify
optimized workload distribution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Georgescu:2011:GAC,
author = "Serban Georgescu and Peter Chow",
title = "{GPU} accelerated {CAE} using open solvers and the
cloud",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "14--19",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082161",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "After more than five years since GPUs were first used
as accelerators for general scientific computations,
the field of General Purpose GPU computing or GPGPU has
finally reached mainstream. Developers have now access
to a mature hardware and software ecosystem. On the
software side, several major open-source packages now
support GPU acceleration while on the hardware side
cloud-based solutions provide a simple way to access
powerful machines with the latest GPUs at low cost. In
this context, we look at the GPU acceleration of CAE,
with a focus on the matrix solvers. We compare the
performance that can be achieved using the open-source
solver package PETSc ran on GPU-enabled Amazon EC2
hardware with that of an optimized legacy FEM code ran
on a last generation 12-core blade server.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2011:DSE,
author = "Junying Chen and Billy Y. S. Yiu and Brandon K.
Hamilton and Alfred C. H. Yu and Hayden K.-H. So",
title = "Design space exploration of adaptive beamforming
acceleration for bedside and portable medical
ultrasound imaging",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "20--25",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082162",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The use of adaptive beamforming is a viable solution
to provide high-resolution real-time medical ultrasound
imaging. However, the increase in image resolution
comes at an expense of a significant increase in
compute requirement over conventional algorithms. In a
bedside diagnosis setting where plug-in power is
available, GPUs are promising accelerators to address
the processing demand. However, in the case of
point-of-care diagnostics where portable ultrasound
imaging devices must be used, alternative
power-efficient computer systems must be employed,
possibly at the expense of lower image resolution in
order to maintain real-time performance. This paper
presents an initial design space exploration on viable
compute architectures that might address the
drastically different requirements between bedside and
portable medical ultrasound imaging systems using
adaptive beamforming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Dohi:2011:GIO,
author = "Keisuke Dohi and Yuichiro Shibata and Kiyoshi Oguri
and Takafumi Fujimoto",
title = "{GPU} implementation and optimization of
electromagnetic simulation using the {FDTD} method for
antenna designing",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "26--31",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082163",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper describes electromagnetical field
simulation using the 3D-FDTD method for antenna
designing on a CUDA compatible GPU. We use the Split
Perfectly Matched Layer as an absorbing boundary
condition. As is well known, the 3D-FDTD method is a
kind of stencil computation and is considered better at
GPU implementation. In order to find the best blocking
size for the target GPU architecture, we empirically
explore a design space of blocking size. We also
propose a kernel fusing method as one of the efficient
optimization methods, which improves the total
performance about 10\% at the cost of a small increase
in memory usage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nagatsuka:2011:CER,
author = "Tomoyuki Nagatsuka and Yoshito Sakaguchi and Takayuki
Matsumura and Kenji Kise",
title = "{CoreSymphony}: an efficient reconfigurable multi-core
architecture",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "32--37",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082165",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper describes CoreSymphony, a cooperative and
reconfigurable superscalar processor architecture that
improves single-thread performance in chip
multiprocessor. CoreSymphony enables some narrow-issue
cores to be fused into a single wide-issue core. In
this paper, we describe the problems associated with
achieving the cooperative superscalar processor. We
then describe techniques by which to overcome these
problems. The evaluation results obtained using
SPEC2006 benchmarks indicate that four-core fusion
achieves 88\% higher IPC than an individual core.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Takamaeda-Yamazaki:2011:FBS,
author = "Shinya Takamaeda-Yamazaki and Ryosuke Sasakawa and
Yoshito Sakaguchi and Kenji Kise",
title = "An {FPGA}-based scalable simulation accelerator for
tile architectures",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "38--43",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082166",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "FPGA-based simulation systems can simulate processor
behavior in realistic time. In order to practically
simulate tile many-core architectures, we propose
ScalableCore for prototyping system development using
multiple FPGAs. In this paper, we present an FPGA-based
platform called ScalableCore system 1.1, which consists
of several simulation tiles named ScalableCore Units.
Each tile is connected to four neighbor tiles via
interface boards called ScalableCore Boards, and so
increasing the target number of cores is easy. We also
describe useful techniques by which to achieve high
scalability of simulation and to implement complicated
hardware functions on an FPGA. The developed system
simulates the behavior of a tile architecture with DMA
communications and NoC 14.2 times faster than a
corresponding software-based functional simulator
running on a standard computer with an Intel Core2Duo
processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sano:2011:DSP,
author = "Kentaro Sano and Satoru Yamamoto and Yoshiaki
Hatsuda",
title = "Domain-specific programmable design of scalable
streaming-array for power-efficient stencil
computation",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "44--49",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082168",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents the domain-specific programmable
design of custom computing machines for
high-performance stencil computation. Stencil
computation is one of the typical kernels in scientific
computations, however its low operational-intensity
makes the sustained performance limited by memory
bandwidth on recent microprocessors and GPUs. So far we
have proposed a scalable streaming-array (SSA) of
processing elements, which provides almost linear
scalability by increasing FPGAs with a constant
externalmemory bandwidth. In order to facilitate custom
computing and efficiently utilize hardware resources
for various and complex stencil-computations, we design
programmable SSA with limited but necessary
functionality. We show the design concept, the
programmable structure and the SIMD instruction set for
SSA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Akamine:2011:IOE,
author = "Takayuki Akamine and Kenta Inakagata and Yasunori
Osana and Naoyuki Fujita and Hideharu Amano",
title = "An implementation of out-of-order execution system for
acceleration of computational fluid dynamics on
{FPGAs}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "50--55",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082169",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "CFD is an important tool for designing aircraft
components. FaSTAR is one of the most recent CFD
program package with various solvers and automatic
generation of grid data. However, FaSTAR is difficult
to be executed in parallel machines because of its
irregular data structure. Here, the surface integral
module, one of cores of FaSTAR is implemented in an
FPGA for future acceleration using a platform FLOPS-2D.
However, even with hardware execution, the pipeline
module suffers from frequent stalls caused by irregular
and successive memory access. In order to rid of the
problem, a data controller for Out-Of-Order execution
was designed and implemented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:2011:EAH,
author = "Haisheng Liu and Smail Niar and Yassin El-Hillali and
Atika Rivenq",
title = "Embedded architecture with hardware accelerator for
target recognition in driver assistance system",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "56--59",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082170",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a new Radar-based recognition
system, which is able to identify obstacles during a
vehicle movement. Obstacles recognition gives the
benefits of avoiding false alarms and allows generating
alarms that take into account the identification of the
obstacle in front of the vehicle. In this paper, we
first identify hotspots in the target recognition
application. Then, we propose an optimized version of
the multiple target recognition algorithm to respect
the real time constraints of the application while
simplifying the underlying hardware platform. We also
propose a flexible embedded architecture with hardware
accelerator that supports the proposed algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pell:2011:SEF,
author = "Oliver Pell and Oskar Mencer",
title = "Surviving the end of frequency scaling with
reconfigurable dataflow computing",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "60--65",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082172",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Over the past decade x86 processors have come to
dominate the world's largest supercomputers. However in
the future conventional multicore processors are
unlikely to be able to deliver the necessary
performance per \$ and per W to achieve exascale
performance. Heterogeneous computing is emerging as a
powerful alternative to conventional multi-core to help
address these challenges. In this paper we describe our
approach to Maximum Performance Computing --- building
application-specific computers which complement
conventional x86 processors with high performance
dataflow engines implemented on FPGA to provide
10--100$ \times $ improvements in performance and
performance/W.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Balevic:2011:KAD,
author = "Ana Balevic and Bart Kienhuis",
title = "{KPN2GPU}: an approach for discovery and exploitation
of fine-grain data parallelism in process networks",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "66--71",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082173",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With advances in manycore and accelerator
architectures, the high performance and embedded spaces
are rapidly converging. Emerging architectures feature
different forms of parallelism. The Polyhedral
Processes Networks (PPNs) are a proven model of choice
for automated generation of pipeline and task parallel
programs from sequential source code, however data
parallelism is not addressed. In this paper, we present
a systematic approach for identification and extraction
of fine grain data parallelism from the PPN
specification. The approach is implemented in a tool,
called kpn2gpu, which produces fine-grain data parallel
CUDA kernels for graphics processing units (GPUs).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Akagic:2011:HSC,
author = "Amila Akagi{\'c} and Hideharu Amano",
title = "High speed {CRC} with 64-bit generator polynomial on
an {FPGA}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "72--77",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082175",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Deployment of jumbo frame sizes beyond 9000 bytes for
storage systems is limited by 32-bit Cyclic Redundancy
Checks used by a network protocol. In order to overcome
this limitation we study possibility of using 64-bit
polynomials in software and hardware, by using fastest
multiple lookup tables algorithms for generating CRCs.
CRC is a sequential process, thus the software based
solutions are limited in throughput by speed and
architectural improvements of a single CPU. We study
tradeoff between using distributed LUTs and embedded
BRAM in hardware implementations. Our results show that
BRAM-based approach is the fastest hardware
implementation, reaching maximum of 347.37 Gbps while
processing 1024 bits at a time, which is 606x faster
than the software implementation of the same algorithm
running on Xeon 3.2 GHz with 2 MB of L2 cache.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yang:2011:BPR,
author = "Shufan Yang and T. M. McGinnity",
title = "A biologically plausible real-time spiking neuron
simulation environment based on a multiple-{FPGA}
platform",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "78--81",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082176",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Neurological research has revealed that neurons encode
information in the timing of spikes. Spiking neural
network simulations are a flexible and powerful method
for investigating the behaviour of such neuronal
systems. The spiking neuron models which are used in
simulations can be described mathematically, but the
continuous time involved in mathematical models needs
to be replaced by discrete time steps. An alternative
approach, hardware implementation, provides the
possibility of generating independent spikes precisely
and simultaneously output spike waves in real
biological time, under the premise that the spiking
neural network implemented in hardware can take full
advantage of hardware-timed speed and reliability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sawada:2011:PCW,
author = "Hiroomi Sawada and Morihiro Kuga and Motoki Amagasaki
and Masahiro Iida and Toshinori Sueyoshi",
title = "Parallelization of the channel width search for {FPGA}
routing",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "82--85",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082177",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tanabe:2011:SFB,
author = "Shoji Tanabe and Takuya Nagashima and Yoshiki
Yamaguchi",
title = "A study of an {FPGA} based flexible {SIMD} processor",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "86--89",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082179",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Trouve:2011:ADA,
author = "Antoine Trouve and Kazuaki Murakami",
title = "Augmenting {DR-ASIP} flexibility through multi-mode
custom instructions",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "90--93",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082180",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper introduces a simple method called multimode
custom instructions, which aims at reducing the power
consumption of the register file of tightly coupled
dynamically reconfigurable application specific
instruction set processors (DR-ASIPs). To this end, it
proposes to divide custom instructions into two sets
depending on criteria related to their size,
distribution and reuse rate. Performance is measured on
a RISC DR-ASIP with a subset of MiBench using an
original automatic custom instruction generator from
assembly based on the dancing link algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kubota:2011:MWS,
author = "Shinya Kubota and Minoru Watanabe",
title = "A {MEMS} writer system embedded for a programmable
optically reconfigurable gate array",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "94--97",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082181",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fousek:2011:AFC,
author = "Jan Fousek and Ji{\v{r}}i Filipovi{\v{c}} and
Matu{\v{s}} Madzin",
title = "Automatic fusions of {CUDA--GPU} kernels for parallel
map",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "98--99",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082183",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "When implementing a function mapping on the
contemporary GPU, several contradictory performance
factors affecting distribution of computation into GPU
kernels have to be balanced. A decomposition-fusion
scheme suggests to decompose the computational problem
to be solved by several simple functions implemented as
standalone kernels and to fuse some of these functions
later into more complex kernels to improve memory
locality. In this paper, a prototype of
source-to-source compiler automating the fusion phase
is presented and the impact of fusions generated by the
compiler as well as compiler efficiency is
experimentally evaluated.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Matsunobu:2011:DCE,
author = "Kohei Matsunobu and Keisuke Dohi and Yuichiro Shibata
and Kiyoshi Oguri",
title = "A discussion on calculating eigenvalues of real
symmetric tridiagonal matrices on a {GPU}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "100--101",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082184",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "While GPUs are attracting attention as an accelerator
in wide-ranged application areas, compatibility between
the architecture and selected algorithm is important to
effectively bring out their potential performance. This
paper focuses on eigenvalue calculation from a given
real symmetric tridiagonal matrix and compares GPU
implementations for the QR method and the bisection
method. Implementation for a total of four different
GPU architectures are shown and compared to reveal the
affinity between algorithms and architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Meyer:2011:MRP,
author = "Dominik Meyer and Bernd Klauer",
title = "Multicore reconfiguration platform an alternative to
{RAMPSoC}",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "102--103",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082185",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The current state of the art in processor performance
improvement is multicore-processor systems. These
systems offer a number of homogeneous and static
processor cores for the parallel distribution of
computational tasks. A novel idea in this research
field is introduced by the Runtime Adaptive
Multi-Processor System-on- Chip (RAMPSoC) approach. It
uses a dynamic and partial reconfigurable system to
offer a heterogeneous multicore-processor system. It is
runtime adaptable to applications needs and provides a
high degree of freedom for system design and task
distribution. The continuation of this idea is the
Multicore Reconfiguration Platform (MRP) presented in
this paper.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bonamy:2011:PLI,
author = "Robin Bonamy and Daniel Chillet and Olivier Sentieys
and Sebastien Bilavarn",
title = "Parallelism Level Impact on Energy Consumption in
Reconfigurable Devices",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "104--105",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082186",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Nowadays, System-on-Chip architectures are composed of
several execution resources which support complex
applications. As it shares silicon area and limits the
cost of the global circuit, the embedding of a
reconfigurable resource in these SoC provides
flexibility to the hardware. In this case, several
implementations of the same algorithm, offering
different characteristics, can be considered in order
to optimize performances. In general, the tasks mapped
on reconfigurable resources are algorithms that can be
defined through several levels of parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Agyeman:2011:PAO,
author = "Michael Opoku Agyeman and Ali Ahmadinia",
title = "Power and area optimisation in heterogeneous {$3$D}
networks-on-chip architectures",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "106--107",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082187",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Three dimensional Network-on-Chip (3D NoC)
architectures have evolved with a lot of interest to
address the on-chip communication delays of modern SoC
systems. However, the vertical interconnections between
layers is more power and area hungry compared to 2D
interconnections. In this paper we propose area
efficient and low power heterogeneous NoC
architectures, which combines both the power and
performance benefits of 2D routers and 3D NoC-bus
hybrid router architectures in 3D mesh topologies.
Experimental results show a negligible penalty of up to
5\% in average packet latency of 3D homogeneous NoC
with bus hybrid routers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2011:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "108--117",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082189",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Das:2011:HSR,
author = "Malay Das and Amitabha Sinha and Nishant Kumar Giri",
title = "High speed residue number system ({RNS}) based {FIR}
filter using distributed arithmetic ({DA})",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "5",
pages = "1--4",
month = dec,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2093339.2093341",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Mar 15 14:07:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chakraborty:2011:CBS,
author = "Anindita Chakraborty and Amitabha Sinha",
title = "Conversion of binary to single-term triple base
numbers for {DSP} applications",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "5",
pages = "5--11",
month = dec,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2093339.2093342",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Mar 15 14:07:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper high speed Residue Number System (RNS)
based FIR filter using Distributed Arithmetic (DA) is
proposed. The proposed architecture uses the module set
having the value of numbers as small as possible. In
case of using Distributed Arithmetic in FIR filter; the
size of LUTs gets increased exponentially with the
increase of tap of the filter. Here care has been taken
so that sizes of LUTs do not get increased. The
proposed architecture is designed using Verilog HDL; a
popular hardware description language [9]. The design
is synthesized with ISE 10.1 and implemented on
Xilinx's Virtex-4. The proposed architecture is also
compared with conventional RNS-DA FIR filter. The
results show that the proposed architecture can
implement FIR filter with high speed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singha:2011:NAF,
author = "Satrughna Singha and Aniruddha Ghosh and Amitabha
Sinha",
title = "A new architecture for {FPGA} based implementation of
conversion of binary to double base number system
({DBNS}) using parallel search technique",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "5",
pages = "12--18",
month = dec,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2093339.2093343",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Mar 15 14:07:10 MDT 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Non-binary number systems are increasingly gaining
popularity in signal processing applications for their
capabilities of handling arithmetic operations
efficiently. One such number system, ``Double Base
Number System (DBNS)'' has gained attention to many
researchers for it's capability of performing
multiplication operation efficiently. Recently,
``Triple Base Number System (TBNS)'' has been
introduced which shows better performance over DBNS for
higher bit operations in terms of speed, hardware
complexity and power dissipation. However, the
advantages of TBNS systems cannot be exploited due to
substantial overhead of conversion from binary to TBNS.
Keeping this issue in view, in this paper, a novel
architecture has been proposed for high performance
binary to TBNS conversion. Efficiency of this
conversion scheme has been dealt with in details and
experimental results and analysis clearly indicate the
novelty of the architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2011:INc,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "5",
pages = "19--23",
month = dec,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2093339.2093345",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Mar 15 14:07:10 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Compute intensive signal Processing Algorithms demand
efficient execution of high performance arithmetic
operations. Since, double base number system (DBNS)
offers high performance arithmetic units, it is gaining
attention to many researchers. However, the advantage
of DBNS can not be exploited due to large conversion
time from binary to DBNS. Keeping this issue in view,
this paper presents a novel conversion scheme using
parallel search technique.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lymberopoulos:2012:PIW,
author = "Dimitrios Lymberopoulos and Oriana Riva and Karin
Strauss and Akshay Mittal and Alexandros Ntoulas",
title = "{PocketWeb}: instant web browsing for mobile devices",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "1--12",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150978",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "The high network latencies and limited battery life of
mobile phones can make mobile web browsing a
frustrating experience. In prior work, we proposed
trading memory capacity for lower web access latency
and a more convenient data transfer schedule from an
energy perspective by prefetching slowly-changing data
(search queries and results) nightly, when the phone is
charging. However, most web content is intrinsically
much more dynamic and may be updated multiple times a
day, thus eliminating the effectiveness of periodic
updates. This paper addresses the challenge of
prefetching dynamic web content in a timely fashion,
giving the user an instant web browsing experience but
without aggravating the battery lifetime issue. We
start by analyzing the web access traces of 8,000
users, and observe that mobile web browsing exhibits a
strong spatiotemporal signature, which is different for
every user. We propose to use a machine learning
approach based on stochastic gradient boosting
techniques to efficiently model this signature on a per
user basis. The machine learning model is capable of
accurately predicting future web accesses and
prefetching the content in a timely manner. Our
experimental evaluation with 48,000 models trained on
real user datasets shows that we can accurately
prefetch 60\% of the URLs for about 80--90\% of the
users within 2 minutes before the request. The system
prototype we built not only provides more than 80\%
lower web access time for more than 80\% of the users,
but it also achieves the same or lower radio energy
dissipation by more than 50\% for the majority of
mobile users.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lin:2012:RUL,
author = "Felix Xiaozhu Lin and Zhen Wang and Robert LiKamWa and
Lin Zhong",
title = "{Reflex}: using low-power processors in smartphones
without knowing them",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "13--24",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150979",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "To accomplish frequent, simple tasks with high
efficiency, it is necessary to leverage low-power,
microcontroller-like processors that are increasingly
available on mobile systems. However, existing
solutions require developers to directly program the
low-power processors and carefully manage
inter-processor communication. We present Reflex, a
suite of compiler and runtime techniques that
significantly lower the barrier for developers to
leverage such low-power processors. The heart of Reflex
is a software Distributed Shared Memory (DSM) that
enables shared memory objects with release consistency
among code running on loosely coupled processors. In
order to achieve high energy efficiency without
sacrificing performance much, the Reflex DSM leverages
(i) extreme architectural asymmetry between low-power
processors and powerful central processors, (ii)
aggressive compile-time optimization, and (iii) a
minimalist runtime that supports efficient message
passing and event-driven execution. We report a
complete realization of Reflex that runs on a TI
OMAP4430-based development platform as well as on a
custom tri-processor mobile platform. Using smartphone
sensing applications reported in recent literature, we
show that Reflex supports a programming style very
close to contemporary smartphone programming. Compared
to message passing, the Reflex DSM greatly reduces
efforts in programming heterogeneous smartphones,
eliminating up to 38\% of the source lines of
application code. Compared to running the same
applications on existing smartphones, Reflex reduces
the average system power consumption by up to 81\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:2012:TGE,
author = "Jichuan Chang and Justin Meza and Parthasarathy
Ranganathan and Amip Shah and Rocky Shih and Cullen
Bash",
title = "Totally green: evaluating and designing servers for
lifecycle environmental impact",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "25--36",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150980",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "The environmental impact of servers and datacenters is
an important future challenge. System architects have
traditionally focused on operational energy as a proxy
for designing green servers, but this ignores important
environmental implications from server production
(materials, manufacturing, etc.). In contrast, this
paper argues for a lifecycle focus on the environmental
impact of future server designs, to include both
operation and production. We present a new methodology
to quantify the total environmental impact of system
design decisions. Our approach uses the thermodynamic
metric of energy consumption, adapted and validated for
use by system architects. Using this methodology, we
evaluate the lifecycle impact of several example system
designs with environment-friendly optimizations. Our
results show that environmental impact from production
can be important (around 20\% on current servers and
growing) and system design choices can reduce this
component (by 30--40\%). Our results also highlight
several, sometimes unexpected, cross-interactions
between the environmental impact of production and
operation that further motivate a total lifecycle
emphasis for future green server designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ferdman:2012:CCS,
author = "Michael Ferdman and Almutaz Adileh and Onur Kocberber
and Stavros Volos and Mohammad Alisafaee and Djordje
Jevdjic and Cansu Kaynak and Adrian Daniel Popescu and
Anastasia Ailamaki and Babak Falsafi",
title = "Clearing the clouds: a study of emerging scale-out
workloads on modern hardware",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "37--48",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150982",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Emerging scale-out workloads require extensive amounts
of computational resources. However, data centers using
modern server hardware face physical constraints in
space and power, limiting further expansion and calling
for improvements in the computational density per
server and in the per-operation energy. Continuing to
improve the computational resources of the cloud while
staying within physical constraints mandates optimizing
server efficiency to ensure that server hardware
closely matches the needs of scale-out workloads. In
this work, we introduce CloudSuite, a benchmark suite
of emerging scale-out workloads. We use performance
counters on modern servers to study scale-out
workloads, finding that today's predominant processor
micro-architecture is inefficient for running these
workloads. We find that inefficiency comes from the
mismatch between the workload needs and modern
processors, particularly in the organization of
instruction and data memory systems and the processor
core micro-architecture. Moreover, while today's
predominant micro-architecture is inefficient when
executing scale-out workloads, we find that continuing
the current trends will further exacerbate the
inefficiency in the future. In this work, we identify
the key micro-architectural needs of scale-out
workloads, calling for a change in the trajectory of
server processors that would lead to improved
computational density and power efficiency in data
centers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2012:IOD,
author = "Yang Chen and Shuangde Fang and Lieven Eeckhout and
Olivier Temam and Chengyong Wu",
title = "Iterative optimization for the data center",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "49--60",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150983",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Iterative optimization is a simple but powerful
approach that searches for the best possible
combination of compiler optimizations for a given
workload. However, each program, if not each data set,
potentially favors a different combination. As a
result, iterative optimization is plagued by several
practical issues that prevent it from being widely used
in practice: a large number of runs are required for
finding the best combination; the process can be data
set dependent; and the exploration process incurs
significant overhead that needs to be compensated for
by performance benefits. Therefore, while iterative
optimization has been shown to have significant
performance potential, it is seldomly used in
production compilers. In this paper, we propose
Iterative Optimization for the Data Center (IODC): we
show that servers and data centers offer a context in
which all of the above hurdles can be overcome. The
basic idea is to spawn different combinations across
workers and recollect performance statistics at the
master, which then evolves to the optimum combination
of compiler optimizations. IODC carefully manages costs
and benefits, and is transparent to the end user. We
evaluate IODC using both MapReduce and throughput
compute-intensive server applications. In order to
reflect the large number of users interacting with the
system, we gather a very large collection of data sets
(at least 1000 and up to several million unique data
sets per program), for a total storage of 10.7TB, and
568 days of CPU time. We report an average performance
improvement of 1.48$ \times $, and up to 2.08$ \times
$, for the MapReduce applications, and 1.14$ \times $,
and up to 1.39$ \times $, for the throughput
compute-intensive server applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ahmad:2012:TOM,
author = "Faraz Ahmad and Srimat T. Chakradhar and Anand
Raghunathan and T. N. Vijaykumar",
title = "{Tarazu}: optimizing {MapReduce} on heterogeneous
clusters",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "61--74",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150984",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Data center-scale clusters are evolving towards
heterogeneous hardware for power, cost, differentiated
price-performance, and other reasons. MapReduce is a
well-known programming model to process large amount of
data on data center-scale clusters. Most MapReduce
implementations have been designed and optimized for
homogeneous clusters. Unfortunately, these
implementations perform poorly on heterogeneous
clusters (e.g., on a 90-node cluster that contains 10
Xeon-based servers and 80 Atom-based servers, Hadoop
performs worse than on 10-node Xeon-only or 80-node
Atom-only homogeneous sub-clusters for many of our
benchmarks). This poor performance remains despite
previously proposed optimizations related to management
of straggler tasks. In this paper, we address
MapReduce's poor performance on heterogeneous clusters.
Our first contribution is that the poor performance is
due to two key factors: (1) the non-intuitive effect
that MapReduce's built-in load balancing results in
excessive and bursty network communication during the
Map phase, and (2) the intuitive effect that the
heterogeneity amplifies load imbalance in the Reduce
computation. Our second contribution is Tarazu, a suite
of optimizations to improve MapReduce performance on
heterogeneous clusters. Tarazu consists of (1)
Communication-Aware Load Balancing of Map computation
(CALB) across the nodes, (2) Communication-Aware
Scheduling of Map computation (CAS) to avoid bursty
network traffic and (3) Predictive Load Balancing of
Reduce computation (PLB) across the nodes. Using the
above 90-node cluster, we show that Tarazu
significantly improves performance over a baseline of
Hadoop with straightforward tuning for hardware
heterogeneity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Govindan:2012:LSE,
author = "Sriram Govindan and Di Wang and Anand Sivasubramaniam
and Bhuvan Urgaonkar",
title = "Leveraging stored energy for handling power
emergencies in aggressively provisioned datacenters",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "75--86",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150985",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Datacenters spend \$10--25 per watt in provisioning
their power infrastructure, regardless of the watts
actually consumed. Since peak power needs arise rarely,
provisioning power infrastructure for them can be
expensive. One can, thus, aggressively under-provision
infrastructure assuming that simultaneous peak draw
across all equipment will happen rarely. The resulting
non-zero probability of emergency events where power
needs exceed provisioned capacity, however small,
mandates graceful reaction mechanisms to cap the power
draw instead of leaving it to disruptive circuit
breakers/fuses. Existing strategies for power capping
use temporal knobs local to a server that throttle the
rate of execution (using power modes), and/or spatial
knobs that redirect/migrate excess load to regions of
the datacenter with more power headroom. We show these
mechanisms to have performance degrading ramifications,
and propose an entirely orthogonal solution that
leverages existing UPS batteries to temporarily augment
the utility supply during emergencies. We build an
experimental prototype to demonstrate such power
capping on a cluster of 8 servers, each with an
individual battery, and implement several online
heuristics in the context of different datacenter
workloads to evaluate their effectiveness in handling
power emergencies. We show that: (i) our battery-based
solution can handle emergencies of short duration on
its own, (ii) supplement existing reaction mechanisms
to enhance their efficacy for longer emergencies, and
(iii) battery even provide feasible options when other
knobs do not suffice.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kadav:2012:UMD,
author = "Asim Kadav and Michael M. Swift",
title = "Understanding modern device drivers",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "87--98",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150987",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Device drivers are the single largest contributor to
operating-system kernel code with over 5 million lines
of code in the Linux kernel, and cause significant
complexity, bugs and development costs. Recent years
have seen a flurry of research aimed at improving the
reliability and simplifying the development of drivers.
However, little is known about what constitutes this
huge body of code beyond the small set of drivers used
for research. In this paper, we study the source code
of Linux drivers to understand what drivers actually
do, how current research applies to them and what
opportunities exist for future research. We determine
whether assumptions made by most driver research, such
as that all drivers belong to a class, are indeed true.
We also analyze driver code and abstractions to
determine whether drivers can benefit from code
re-organization or hardware trends. We develop a set of
static-analysis tools to analyze driver code across
various axes. Broadly, our study looks at three aspects
of driver code (i) what are the characteristics of
driver code functionality and how applicable is driver
research to all drivers, (ii) how do drivers interact
with the kernel, devices, and buses, and (iii) are
there similarities that can be abstracted into
libraries to reduce driver size and complexity? We find
that many assumptions made by driver research do not
apply to all drivers. At least 44\% of drivers have
code that is not captured by a class definition, 28\%
of drivers support more than one device per driver, and
15\% of drivers do significant computation over data.
From the driver interactions study, we find USB bus
offers an efficient bus interface with significant
standardized code and coarse-grained access, ideal for
executing drivers in isolation. We also find that
drivers for different buses and classes have widely
varying levels of device interaction, which indicates
that the cost of isolation will vary by class. Finally,
from our driver similarity study, we find 8\% of all
driver code is substantially similar to code elsewhere
and may be removed with new abstractions or
libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Panneerselvam:2012:COS,
author = "Sankaralingam Panneerselvam and Michael M. Swift",
title = "{Chameleon}: operating system support for dynamic
processors",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "99--110",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150988",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "The rise of multi-core processors has shifted
performance efforts towards parallel programs. However,
single-threaded code, whether from legacy programs or
ones difficult to parallelize, remains important.
Proposed asymmetric multicore processors statically
dedicate hardware to improve sequential performance,
but at the cost of reduced parallel performance.
However, several proposed mechanisms provide the
best-of-both-worlds by combining multiple cores into a
single, more powerful processor for sequential code.
For example, Core Fusion merges multiple cores to pool
caches and functional units, and Intel's Turbo Boost
raises the clock speed of a core if the other cores on
a chip are powered down. These reconfiguration
mechanisms have two important properties. First the set
of available cores and their capabilities can vary over
short time scales. Current operating systems are not
designed for rapidly changing hardware: the existing
hotplug mechanisms for reconfiguring processors require
global operations and hundreds of milliseconds to
complete. Second, configurations may be mutually
exclusive: using power to speed one core means it
cannot be used to speed another. Current schedulers
cannot manage this requirement. We present Chameleon,
an extension to Linux to support dynamic processors
that can reconfigure their cores at runtime. Chameleon
provides processor proxies to enable rapid
reconfiguration, execution objects to abstract the
processing capabilities of physical CPUs, and a cluster
scheduler to balance the needs of sequential and
parallel programs. In experiments that emulate a
dynamic processor, we find that Chameleon can
reconfigure processors 100,000 times faster than Linux
and allows applications full access to hardware
capabilities: sequential code runs at full speed on a
powerful execution context, while parallel code runs on
as many cores as possible.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hwang:2012:CRD,
author = "Andy A. Hwang and Ioan A. Stefanovici and Bianca
Schroeder",
title = "Cosmic rays don't strike twice: understanding the
nature of {DRAM} errors and the implications for system
design",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "111--122",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150989",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Main memory is one of the leading hardware causes for
machine crashes in today's datacenters. Designing,
evaluating and modeling systems that are resilient
against memory errors requires a good understanding of
the underlying characteristics of errors in DRAM in the
field. While there have recently been a few first
studies on DRAM errors in production systems, these
have been too limited in either the size of the data
set or the granularity of the data to conclusively
answer many of the open questions on DRAM errors. Such
questions include, for example, the prevalence of soft
errors compared to hard errors, or the analysis of
typical patterns of hard errors. In this paper, we
study data on DRAM errors collected on a diverse range
of production systems in total covering nearly 300
terabyte-years of main memory. As a first contribution,
we provide a detailed analytical study of DRAM error
characteristics, including both hard and soft errors.
We find that a large fraction of DRAM errors in the
field can be attributed to hard errors and we provide a
detailed analytical study of their characteristics. As
a second contribution, the paper uses the results from
the measurement study to identify a number of promising
directions for designing more resilient systems and
evaluates the potential of different protection
mechanisms in the light of realistic error patterns.
One of our findings is that simple page retirement
policies might be able to mask a large number of DRAM
errors in production systems, while sacrificing only a
negligible fraction of the total DRAM in the system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hari:2012:REA,
author = "Siva Kumar Sastry Hari and Sarita V. Adve and Helia
Naeimi and Pradeep Ramachandran",
title = "{Relyzer}: exploiting application-level fault
equivalence to analyze application resiliency to
transient faults",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "123--134",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150990",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Future microprocessors need low-cost solutions for
reliable operation in the presence of failure-prone
devices. A promising approach is to detect hardware
faults by deploying low-cost monitors of software-level
symptoms of such faults. Recently, researchers have
shown these mechanisms work well, but there remains a
non-negligible risk that several faults may escape the
symptom detectors and result in silent data corruptions
(SDCs). Most prior evaluations of symptom-based
detectors perform fault injection campaigns on
application benchmarks, where each run simulates the
impact of a fault injected at a hardware site at a
certain point in the application's execution
(application fault site). Since the total number of
application fault sites is very large (trillions for
standard benchmark suites), it is not feasible to study
all possible faults. Previous work therefore typically
studies a randomly selected sample of faults. Such
studies do not provide any feedback on the portions of
the application where faults were not injected. Some of
those instructions may be vulnerable to SDCs, and
identifying them could allow protecting them through
other means if needed. This paper presents Relyzer, an
approach that systematically analyzes all application
fault sites and carefully picks a small subset to
perform selective fault injections for transient
faults. Relyzer employs novel fault pruning techniques
that prune faults that need detailed study by either
predicting their outcomes or showing them equivalent to
other faults. We find that Relyzer prunes about 99.78\%
of the total faults across twelve applications studied
here, reducing the faults that require detailed
simulation by 3 to 5 orders of magnitude for most of
the applications. Fault injection simulations on the
remaining faults can identify SDC causing faults in the
entire application. Some of Relyzer's techniques rely
on heuristics to determine fault equivalence. Our
validation efforts show that Relyzer determines fault
outcomes with 96\% accuracy, averaged across all the
applications studied here.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Feiner:2012:CKI,
author = "Peter Feiner and Angela Demke Brown and Ashvin Goel",
title = "Comprehensive kernel instrumentation via dynamic
binary translation",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "135--146",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150992",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Dynamic binary translation (DBT) is a powerful
technique that enables fine-grained monitoring and
manipulation of an existing program binary. At the user
level, it has been employed extensively to develop
various analysis, bug-finding, and security tools. Such
tools are currently not available for operating system
(OS) binaries since no comprehensive DBT framework
exists for the OS kernel. To address this problem, we
have developed a DBT framework that runs as a Linux
kernel module, based on the user-level DynamoRIO
framework. Our approach is unique in that it controls
all kernel execution, including interrupt and exception
handlers and device drivers, enabling comprehensive
instrumentation of the OS without imposing any overhead
on user-level code. In this paper, we discuss the key
challenges in designing and building an in-kernel DBT
framework and how the design differs from user-space.
We use our framework to build several sample
instrumentations, including simple instruction counting
as well as an implementation of shadow memory for the
kernel. Using the shadow memory, we build a kernel
stack overflow protection tool and a memory
addressability checking tool. Qualitatively, the system
is fast enough and stable enough to run the normal
desktop workload of one of the authors for several
weeks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Odaira:2012:COA,
author = "Rei Odaira and Toshio Nakatani",
title = "Continuous object access profiling and optimizations
to overcome the memory wall and bloat",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "147--158",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150993",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Future microprocessors will have more serious memory
wall problems since they will include more cores and
threads in each chip. Similarly, future applications
will have more serious memory bloat problems since they
are more often written using object-oriented languages
and reusable frameworks. To overcome such problems, the
language runtime environments must accurately and
efficiently profile how programs access objects. We
propose Barrier Profiler, a low-overhead object access
profiler using a memory-protection-based approach
called pointer barrierization and adaptive overhead
reduction techniques. Unlike previous
memory-protection-based techniques, pointer
barrierization offers per-object protection by
converting all of the pointers to a given object to
corresponding barrier pointers that point to protected
pages. Barrier Profiler achieves low overhead by not
causing signals at object accesses that are unrelated
to the needed profiles, based on profile feedback and a
compiler analysis. Our experimental results showed
Barrier Profiler provided sufficiently accurate
profiles with 1.3\% on average and at most 3.4\%
performance overhead for allocation-intensive
benchmarks, while previous code-instrumentation-based
techniques suffered from 9.2\% on average and at most
12.6\% overhead. The low overhead allows Barrier
Profiler to be run continuously on production systems.
Using Barrier Profiler, we implemented two new online
optimizations to compress write-only character arrays
and to adjust the initial sizes of mostly non-accessed
arrays. They resulted in speed-ups of up to 8.6\% and
36\%, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Greathouse:2012:CUW,
author = "Joseph L. Greathouse and Hongyi Xin and Yixin Luo and
Todd Austin",
title = "A case for unlimited watchpoints",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "159--172",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150994",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Numerous tools have been proposed to help developers
fix software errors and inefficiencies. Widely-used
techniques such as memory checking suffer from
overheads that limit their use to pre-deployment
testing, while more advanced systems have such severe
performance impacts that they may require
special-purpose hardware. Previous works have described
hardware that can accelerate individual analyses, but
such specialization stymies adoption; generalized
mechanisms are more likely to be added to commercial
processors. This paper demonstrates that the ability to
set an unlimited number of fine-grain data watchpoints
can reduce the runtime overheads of numerous dynamic
software analysis techniques. We detail the watchpoint
capabilities required to accelerate these analyses
while remaining general enough to be useful in the
future. We describe a hardware design that stores
watchpoints in main memory and utilizes two different
on-chip caches to accelerate performance. The first is
a bitmap lookaside buffer that stores fine-grained
watchpoints, while the second is a range cache that can
efficiently hold large contiguous regions of
watchpoints. As an example of the power of such a
system, it is possible to use watchpoints to accelerate
read/write set checks in a software data race detector
by nearly 9$ \times $.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Olszewski:2012:AAS,
author = "Marek Olszewski and Qin Zhao and David Koh and Jason
Ansel and Saman Amarasinghe",
title = "{Aikido}: accelerating shared data dynamic analyses",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "173--184",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150995",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Despite a burgeoning demand for parallel programs, the
tools available to developers working on shared-memory
multicore processors have lagged behind. One reason for
this is the lack of hardware support for inspecting the
complex behavior of these parallel programs.
Inter-thread communication, which must be instrumented
for many types of analyses, may occur with any memory
operation. To detect such thread communication in
software, many existing tools require the
instrumentation of all memory operations, which leads
to significant performance overheads. To reduce this
overhead, some existing tools resort to random sampling
of memory operations, which introduces false negatives.
Unfortunately, neither of these approaches provide the
speed and accuracy programmers have traditionally
expected from their tools. In this work, we present
Aikido, a new system and framework that enables the
development of efficient and transparent analyses that
operate on shared data. Aikido uses a hybrid of
existing hardware features and dynamic binary rewriting
to detect thread communication with low overhead.
Aikido runs a custom hypervisor below the operating
system, which exposes per-thread hardware protection
mechanisms not available in any widely used operating
system. This hybrid approach allows us to benefit from
the low cost of detecting memory accesses with
hardware, while maintaining the word-level accuracy of
a software-only approach. To evaluate our framework, we
have implemented an Aikido-enabled vector clock race
detector. Our results show that the Aikido enabled
race-detector outperforms existing techniques that
provide similar accuracy by up to 6.0x, and 76\% on
average, on the PARSEC benchmark suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kasikci:2012:DRV,
author = "Baris Kasikci and Cristian Zamfir and George Candea",
title = "Data races vs. data race bugs: telling the difference
with {Portend}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "185--198",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150997",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Even though most data races are harmless, the harmful
ones are at the heart of some of the worst concurrency
bugs. Alas, spotting just the harmful data races in
programs is like finding a needle in a haystack:
76\%--90\% of the true data races reported by
state-of-the-art race detectors turn out to be harmless
[45]. We present Portend, a tool that not only detects
races but also automatically classifies them based on
their potential consequences: Could they lead to
crashes or hangs? Could their effects be visible
outside the program? Are they harmless? Our proposed
technique achieves high accuracy by efficiently
analyzing multiple paths and multiple thread schedules
in combination, and by performing symbolic comparison
between program outputs. We ran Portend on 7 real-world
applications: it detected 93 true data races and
correctly classified 92 of them, with no human effort.
6 of them are harmful races. Portend's classification
accuracy is up to 88\% higher than that of existing
tools, and it produces easy-to-understand evidence of
the consequences of harmful races, thus both proving
their harmfulness and making debugging easier. We
envision Portend being used for testing and debugging,
as well as for automatically triaging bug reports.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Clements:2012:SAS,
author = "Austin T. Clements and M. Frans Kaashoek and Nickolai
Zeldovich",
title = "Scalable address spaces using {RCU} balanced trees",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "199--210",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150998",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Software developers commonly exploit multicore
processors by building multithreaded software in which
all threads of an application share a single address
space. This shared address space has a cost: kernel
virtual memory operations such as handling soft page
faults, growing the address space, mapping files, etc.
can limit the scalability of these applications. In
widely-used operating systems, all of these operations
are synchronized by a single per-process lock. This
paper contributes a new design for increasing the
concurrency of kernel operations on a shared address
space by exploiting read-copy-update (RCU) so that soft
page faults can both run in parallel with operations
that mutate the same address space and avoid contending
with other page faults on shared cache lines. To enable
such parallelism, this paper also introduces an
RCU-based binary balanced tree for storing memory
mappings. An experimental evaluation using three
multithreaded applications shows performance
improvements on 80 cores ranging from 1.7x to 3.4x for
an implementation of this design in the Linux 2.6.37
kernel. The RCU-based binary tree enables soft page
faults to run at a constant cost with an increasing
number of cores,suggesting that the design will scale
well beyond 80 cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Volos:2012:ATM,
author = "Haris Volos and Andres Jaan Tack and Michael M. Swift
and Shan Lu",
title = "Applying transactional memory to concurrency bugs",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "211--222",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2150999",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Multithreaded programs often suffer from
synchronization bugs such as atomicity violations and
deadlocks. These bugs arise from complicated locking
strategies and ad hoc synchronization methods to avoid
the use of locks. A survey of the bug databases of
major open-source applications shows that concurrency
bugs often take multiple fix attempts, and that fixes
often introduce yet more concurrency bugs.
Transactional memory (TM) enables programmers to
declare regions of code atomic without specifying a
lock and has the potential to avoid these bugs. Where
most previous studies have focused on using TM to write
new programs from scratch, we consider its utility in
fixing existing programs with concurrency bugs. We
therefore investigate four methods of using TM on three
concurrent programs. Overall, we find that 29\% of the
bugs are not fixable by transactional memory, showing
that TM does not address many important types of
concurrency bugs. In particular, TM works poorly with
extremely long critical sections and with deadlocks
involving both condition variables and I/O. Conversely,
we find that for 56\% of the bugs, transactional memory
offers demonstrable value by simplifying the reasoning
behind a fix or the effort to implement a fix, and
using transactions in the first place would have
avoided 71\% of the bugs examined. We also find that ad
hoc synchronization put in place to avoid the overhead
of locking can be greatly simplified with TM, but
requires hardware support to perform well.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Joao:2012:BIS,
author = "Jos{\'e} A. Joao and M. Aater Suleman and Onur Mutlu
and Yale N. Patt",
title = "Bottleneck identification and scheduling in
multithreaded applications",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "223--234",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151001",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Performance of multithreaded applications is limited
by a variety of bottlenecks, e.g. critical sections,
barriers and slow pipeline stages. These bottlenecks
serialize execution, waste valuable execution cycles,
and limit scalability of applications. This paper
proposes Bottleneck Identification and Scheduling in
Multithreaded Applications (BIS), a cooperative
software-hardware mechanism to identify and accelerate
the most critical bottlenecks. BIS identifies which
bottlenecks are likely to reduce performance by
measuring the number of cycles threads have to wait for
each bottleneck, and accelerates those bottlenecks
using one or more fast cores on an Asymmetric Chip
Multi-Processor (ACMP). Unlike previous work that
targets specific bottlenecks, BIS can identify and
accelerate bottlenecks regardless of their type. We
compare BIS to four previous approaches and show that
it outperforms the best of them by 15\% on average.
BIS' performance improvement increases as the number of
cores and the number of fast cores in the system
increase.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Radojkovic:2012:OTA,
author = "Petar Radojkovi{\'c} and Vladimir Cakarevi{\'c} and
Miquel Moret{\'o} and Javier Verd{\'u} and Alex Pajuelo
and Francisco J. Cazorla and Mario Nemirovsky and Mateo
Valero",
title = "Optimal task assignment in multithreaded processors: a
statistical approach",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "235--248",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151002",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "The introduction of massively multithreaded (MMT)
processors, comprised of a large number of cores with
many shared resources, has made task scheduling, in
particular task to hardware thread assignment, one of
the most promising ways to improve system performance.
However, finding an optimal task assignment for a
workload running on MMT processors is an NP-complete
problem. Due to the fact that the performance of the
best possible task assignment is unknown, the room for
improvement of current task-assignment algorithms
cannot be determined. This is a major problem for the
industry because it could lead to: (1)~A waste of
resources if excessive effort is devoted to improving a
task assignment algorithm that already provides a
performance that is close to the optimal one, or
(2)~significant performance loss if insufficient effort
is devoted to improving poorly-performing task
assignment algorithms. In this paper, we present a
method based on Extreme Value Theory that allows the
prediction of the performance of the optimal task
assignment in MMT processors. We further show that
executing a sample of several hundred or several
thousand random task assignments is enough to obtain,
with very high confidence, an assignment with a
performance that is close to the optimal one. We
validate our method with an industrial case study for a
set of multithreaded network applications running on an
UltraSPARC~T2 processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jaleel:2012:CCR,
author = "Aamer Jaleel and Hashem H. Najaf-abadi and Samantika
Subramaniam and Simon C. Steely and Joel Emer",
title = "{CRUISE}: cache replacement and utility-aware
scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "249--260",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151003",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "When several applications are co-scheduled to run on a
system with multiple shared LLCs, there is opportunity
to improve system performance. This opportunity can be
exploited by the hardware, software, or a combination
of both hardware and software. The software, i.e., an
operating system or hypervisor, can improve system
performance by co-scheduling jobs on LLCs to minimize
shared cache contention. The hardware can improve
system throughput through better replacement policies
by allocating more cache resources to applications that
benefit from the cache and less to those applications
that do not. This study presents a detailed analysis on
the interactions between intelligent scheduling and
smart cache replacement policies. We find that smart
cache replacement reduces the burden on software to
provide intelligent scheduling decisions. However,
under smart cache replacement, there is still room to
improve performance from better application
co-scheduling. We find that co-scheduling decisions are
a function of the underlying LLC replacement policy. We
propose Cache Replacement and Utility-aware Scheduling
(CRUISE)-a hardware/software co-designed approach for
shared cache management. For 4-core and 8-core CMPs, we
find that CRUISE approaches the performance of an ideal
job co-scheduling policy under different LLC
replacement policies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DeVuyst:2012:EMH,
author = "Matthew DeVuyst and Ashish Venkat and Dean M.
Tullsen",
title = "Execution migration in a heterogeneous-{ISA} chip
multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "261--272",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151004",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Prior research has shown that single-ISA heterogeneous
chip multiprocessors have the potential for greater
performance and energy efficiency than homogeneous
CMPs. However, restricting the cores to a single ISA
removes an important opportunity for greater
heterogeneity. To take full advantage of a
heterogeneous-ISA CMP, however, we must be able to
migrate execution among heterogeneous cores in order to
adapt to program phase changes and changing external
conditions (e.g., system power state). This paper
explores migration on heterogeneous-ISA CMPs. This is
non-trivial because program state is kept in an
architecture-specific form; therefore, state
transformation is necessary for migration. To keep
migration cost low, the amount of state that requires
transformation must be minimized. This work identifies
large portions of program state whose form is not
critical for performance; the compiler is modified to
produce programs that keep most of their state in an
architecture-neutral form so that only a small number
of data items must be repositioned and no pointers need
to be changed. The result is low migration cost with
minimal sacrifice of non-migration performance.
Additionally, this work leverages binary translation to
enable instantaneous migration. When migration is
requested, the program is immediately migrated to a
different core where binary translation runs for a
short time until a function call is reached, at which
point program state is transformed and execution
continues natively on the new core. This system can
tolerate migrations as often as every 100 ms and still
retain 95\% of the performance of a system that does
not do, or support, migration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lin:2012:ESC,
author = "Changhui Lin and Vijay Nagarajan and Rajiv Gupta and
Bharghava Rajaram",
title = "Efficient sequential consistency via conflict
ordering",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "273--286",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151006",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Although the sequential consistency (SC) model is the
most intuitive, processor designers often choose to
support relaxed memory consistency models for higher
performance. This is because SC implementations that
match the performance of relaxed memory models require
post-retirement speculation and its associated hardware
costs. In this paper we propose an efficient approach
for enforcing SC without requiring post-retirement
speculation. While prior SC implementations guarantee
SC by explicitly completing memory operations within a
processor in program order, we guarantee SC by
completing conflicting memory operations, within and
across processors, in an order that is consistent with
the program order. More specifically, we identify those
conflicting memory operations whose ordering is
critical for the maintenance of SC and explicitly order
them. This allows us to safely (non-speculatively)
complete memory operations past pending writes, thus
reducing memory ordering stalls. Our experiments with
SPLASH-2 programs show that SC can be achieved
efficiently, with performance comparable to RMO
(relaxed memory order).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cheriton:2012:HAS,
author = "David Cheriton and Amin Firoozshahian and Alex
Solomatnikov and John P. Stevenson and Omid Azizi",
title = "{HICAMP}: architectural support for efficient
concurrency-safe shared structured data access",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "287--300",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151007",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Programming language and operating system support for
efficient concurrency-safe access to shared data is a
key concern for the effective use of multi-core
processors. Most research has focused on the software
model of multiple threads accessing this data within a
single shared address space. However, many real
applications are actually structured as multiple
separate processes for fault isolation and simplified
synchronization. In this paper, we describe the HICAMP
architecture and its innovative memory system, which
supports efficient concurrency safe access to
structured shared data without incurring the overhead
of inter-process communication. The HICAMP architecture
also provides support for programming language and OS
structures such as threads, iterators, read-only access
and atomic update. In addition to demonstrating that
HICAMP is beneficial for multi-process structured
applications, our evaluation shows that the same
mechanisms provide substantial benefits for other
areas, including sparse matrix computations and
virtualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Esmaeilzadeh:2012:ASD,
author = "Hadi Esmaeilzadeh and Adrian Sampson and Luis Ceze and
Doug Burger",
title = "Architecture support for disciplined approximate
programming",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "301--312",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151008",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Disciplined approximate programming lets programmers
declare which parts of a program can be computed
approximately and consequently at a lower energy cost.
The compiler proves statically that all approximate
computation is properly isolated from precise
computation. The hardware is then free to selectively
apply approximate storage and approximate computation
with no need to perform dynamic correctness checks. In
this paper, we propose an efficient mapping of
disciplined approximate programming onto hardware. We
describe an ISA extension that provides approximate
operations and storage, which give the hardware freedom
to save energy at the cost of accuracy. We then propose
Truffle, a microarchitecture design that efficiently
supports the ISA extensions. The basis of our design is
dual-voltage operation, with a high voltage for precise
operations and a low voltage for approximate
operations. The key aspect of the microarchitecture is
its dependence on the instruction stream to determine
when to use the low voltage. We evaluate the power
savings potential of in-order and out-of-order Truffle
configurations and explore the resulting quality of
service degradation. We evaluate several applications
and demonstrate energy savings up to 43\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Meisner:2012:DAS,
author = "David Meisner and Thomas F. Wenisch",
title = "{DreamWeaver}: architectural support for deep sleep",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "313--324",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151009",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Numerous data center services exhibit low average
utilization leading to poor energy efficiency. Although
CPU voltage and frequency scaling historically has been
an effective means to scale down power with
utilization, transistor scaling trends are limiting its
effectiveness and the CPU is accounting for a shrinking
fraction of system power. Recent research advocates the
use of full-system idle low-power modes to combat
energy losses, as such modes provide the deepest power
savings with bounded response time impact. However, the
trend towards increasing cores per die is undermining
the effectiveness of these sleep modes, particularly
for request-parallel data center applications, because
the independent idle periods across individual cores
are unlikely to align by happenstance. We propose
DreamWeaver, architectural support to facilitate deep
sleep for request-parallel applications on multicore
servers. DreamWeaver comprises two elements: Weave
Scheduling, a scheduling policy to coalesce idle and
busy periods across cores to create opportunities for
system-wide deep sleep; and the Dream Processor, a
light-weight co-processor that monitors incoming
network traffic and suspended work during sleep to
determine when the system must wake. DreamWeaver is
based on two key concepts: (1) stall execution and
sleep anytime any core is unoccupied, but (2) constrain
the maximum time any request may be stalled. Unlike
prior scheduling approaches, DreamWeaver will preempt
execution to sleep, maximizing time spent at the
systems' most efficient operating point. We demonstrate
that DreamWeaver can smoothly trade-off bounded,
predictable increases in 99th-percentile response time
for increasing power savings, and strictly dominates
the savings available with voltage and frequency
scaling and timeout-based request batching schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{King:2012:AGH,
author = "Myron King and Nirav Dave and Arvind",
title = "Automatic generation of hardware\slash software
interfaces",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "325--336",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151011",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Enabling new applications for mobile devices often
requires the use of specialized hardware to reduce
power consumption. Because of time-to-market pressure,
current design methodologies for embedded applications
require an early partitioning of the design, allowing
the hardware and software to be developed
simultaneously, each adhering to a rigid interface
contract. This approach is problematic for two reasons:
(1) a detailed hardware-software interface is difficult
to specify until one is deep into the design process,
and (2) it prevents the later migration of
functionality across the interface motivated by
efficiency concerns or the addition of features. We
address this problem using the Bluespec Codesign
Language~(BCL) which permits the designer to specify
the hardware-software partition in the source code,
allowing the compiler to synthesize efficient software
and hardware along with transactors for communication
between the partitions. The movement of functionality
across the hardware-software boundary is accomplished
by simply specifying a new partitioning, and since the
compiler automatically generates the desired interface
specifications, it eliminates yet another error-prone
design task. In this paper we present BCL, an extension
of a commercially available hardware design language
(Bluespec SystemVerilog), a new software compiling
scheme, and preliminary results generated using our
compiler for various hardware-software decompositions
of an Ogg Vorbis audio decoder, and a ray-tracing
application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martignoni:2012:PEL,
author = "Lorenzo Martignoni and Stephen McCamant and Pongsin
Poosankam and Dawn Song and Petros Maniatis",
title = "Path-exploration lifting: hi-fi tests for lo-fi
emulators",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "337--348",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151012",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Processor emulators are widely used to provide
isolation and instrumentation of binary software.
However they have proved difficult to implement
correctly: processor specifications have many corner
cases that are not exercised by common workloads. It is
untenable to base other system security properties on
the correctness of emulators that have received only
ad-hoc testing. To obtain emulators that are worthy of
the required trust, we propose a technique to explore a
high-fidelity emulator with symbolic execution, and
then lift those test cases to test a lower-fidelity
emulator. The high-fidelity emulator serves as a proxy
for the hardware specification, but we can also further
validate by running the tests on real hardware. We
implement our approach and apply it to generate about
610,000 test cases; for about 95\% of the instructions
we achieve complete path coverage. The tests reveal
thousands of individual differences; we analyze those
differences to shed light on a number of root causes,
such as atomicity violations and missing security
features.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hong:2012:GMD,
author = "Sungpack Hong and Hassan Chafi and Edic Sedlar and
Kunle Olukotun",
title = "{Green-Marl}: a {DSL} for easy and efficient graph
analysis",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "349--362",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151013",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "The increasing importance of graph-data based
applications is fueling the need for highly efficient
and parallel implementations of graph analysis
software. In this paper we describe Green-Marl, a
domain-specific language (DSL) whose high level
language constructs allow developers to describe their
graph analysis algorithms intuitively, but expose the
data-level parallelism inherent in the algorithms. We
also present our Green-Marl compiler which translates
high-level algorithmic description written in
Green-Marl into an efficient C++ implementation by
exploiting this exposed data-level parallelism.
Furthermore, our Green-Marl compiler applies a set of
optimizations that take advantage of the high-level
semantic knowledge encoded in the Green-Marl DSL. We
demonstrate that graph analysis algorithms can be
written very intuitively with Green-Marl through some
examples, and our experimental results show that the
compiler-generated implementation out of such
descriptions performs as well as or better than
highly-tuned hand-coded implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Park:2012:SDE,
author = "Yongjun Park and Sangwon Seo and Hyunchul Park and
Hyoun Kyu Cho and Scott Mahlke",
title = "{SIMD} defragmenter: efficient {ILP} realization on
data-parallel architectures",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "363--374",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151014",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Single-instruction multiple-data (SIMD) accelerators
provide an energy-efficient platform to scale the
performance of mobile systems while still retaining
post-programmability. The central challenge is
translating the parallel resources of the SIMD hardware
into real application performance. In scientific
applications, automatic vectorization techniques have
proven quite effective at extracting large levels of
data-level parallelism (DLP). However, vectorization is
often much less effective for media applications due to
low trip count loops, complex control flow, and
non-uniform execution behavior. As a result, SIMD lanes
remain idle due to insufficient DLP. To attack this
problem, this paper proposes a new vectorization pass
called SIMD Defragmenter to uncover hidden DLP that
lurks below the surface in the form of
instruction-level parallelism (ILP). The difficulty is
managing the data packing/unpacking overhead that can
easily exceed the benefits gained through SIMD
execution. The SIMD degragmenter overcomes this problem
by identifying groups of compatible instructions
(subgraphs) that can be executed in parallel across the
SIMD lanes. By SIMDizing in bulk at the subgraph level,
packing/unpacking overhead is minimized. On a 16-lane
SIMD processor, experimental results show that SIMD
defragmentation achieves a mean 1.6x speedup over
traditional loop vectorization and a 31\% gain over
prior research approaches for converting ILP to DLP.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Simha:2012:UAS,
author = "Dilip Nijagal Simha and Maohua Lu and Tzi-cker
Chiueh",
title = "An update-aware storage system for low-locality
update-intensive workloads",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "375--386",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151016",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Traditional storage systems provide a simple
read/write interface, which is inadequate for
low-locality update-intensive workloads because it
limits the disk scheduling flexibility and results in
inefficient use of buffer memory and raw disk
bandwidth. This paper describes an update-aware disk
access interface that allows applications to explicitly
specify disk update requests and associate with such
requests call-back functions that will be invoked when
the requested disk blocks are brought into memory.
Because call-back functions offer a continuation
mechanism after retrieval of requested blocks, storage
systems supporting this interface are given more
flexibility in scheduling pending disk update requests.
In particular, this interface enables a simple but
effective technique called Batching mOdifications with
Sequential Commit (BOSC), which greatly improves the
sustained throughput of a storage system under
low-locality update-intensive workloads. In addition,
together with a space-efficient low-latency disk
logging technique, BOSC is able to deliver the same
durability guarantee as synchronous disk updates.
Empirical measurements show that the random update
throughput of a BOSC-based B+ tree is more than an
order of magnitude higher than that of the same B+ tree
implementation on a traditional storage system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Caulfield:2012:PSU,
author = "Adrian M. Caulfield and Todor I. Mollov and Louis Alex
Eisner and Arup De and Joel Coburn and Steven Swanson",
title = "Providing safe, user space access to fast, solid state
disks",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "387--400",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151017",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Emerging fast, non-volatile memories (e.g., phase
change memories, spin-torque MRAMs, and the memristor)
reduce storage access latencies by an order of
magnitude compared to state-of-the-art flash-based
SSDs. This improved performance means that software
overheads that had little impact on the performance of
flash-based systems can present serious bottlenecks in
systems that incorporate these new technologies. We
describe a novel storage hardware and software
architecture that nearly eliminates two sources of this
overhead: Entering the kernel and performing file
system permission checks. The new architecture provides
a private, virtualized interface for each process and
moves file system protection checks into hardware. As a
result, applications can access file data without
operating system intervention, eliminating OS and file
system costs entirely for most accesses. We describe
the support the system provides for fast permission
checks in hardware, our approach to notifying
applications when requests complete, and the small,
easily portable changes required in the file system to
support the new access model. Existing applications
require no modification to use the new interface. We
evaluate the performance of the system using a suite of
microbenchmarks and database workloads and show that
the new interface improves latency and bandwidth for 4
KB writes by 60\% and 7.2x, respectively, OLTP database
transaction throughput by up to 2.0x, and Berkeley-DB
throughput by up to 5.7x. A streamlined asynchronous
file IO interface built to fully utilize the new
interface enables an additional 5.5x increase in
throughput with 1 thread and 2.8x increase in
efficiency for 512 B transfers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Narayanan:2012:WSP,
author = "Dushyanth Narayanan and Orion Hodson",
title = "Whole-system persistence",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "401--410",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151018",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Today's databases and key-value stores commonly keep
all their data in main memory. A single server can have
over 100 GB of memory, and a cluster of such servers
can have 10s to 100s of TB. However, a storage back end
is still required for recovery from failures. Recovery
can last for minutes for a single server or hours for a
whole cluster, causing heavy load on the back end.
Non-volatile main memory (NVRAM) technologies can help
by allowing near-instantaneous recovery of in-memory
state. However, today's software does not support this
well. Block-based approaches such as persistent buffer
caches suffer from data duplication and block transfer
overheads. Recently, user-level persistent heaps have
been shown to have much better performance than these.
However they require substantial application
modification and still have significant runtime
overheads. This paper proposes whole-system persistence
(WSP) as an alternative. WSP is aimed at systems where
all memory is non-volatile. It transparently recovers
an application's entire state, making a failure appear
as a suspend/resume event. Runtime overheads are
eliminated by using ``flush on fail'': transient state
in processor registers and caches is flushed to NVRAM
only on failure, using the residual energy from the
system power supply. Our evaluation shows that this
approach has 1.6--13 times better runtime performance
than a persistent heap, and that flush-on-fail can
complete safely within 2--35\\% of the residual energy
window provided by standard power supplies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gordon:2012:EBM,
author = "Abel Gordon and Nadav Amit and Nadav Har'El and Muli
Ben-Yehuda and Alex Landau and Assaf Schuster and Dan
Tsafrir",
title = "{ELI}: bare-metal performance for {I/O}
virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "411--422",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151020",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Direct device assignment enhances the performance of
guest virtual machines by allowing them to communicate
with I/O devices without host involvement. But even
with device assignment, guests are still unable to
approach bare-metal performance, because the host
intercepts all interrupts, including those interrupts
generated by assigned devices to signal to guests the
completion of their I/O requests. The host involvement
induces multiple unwarranted guest/host context
switches, which significantly hamper the performance of
I/O intensive workloads. To solve this problem, we
present ELI (ExitLess Interrupts), a software-only
approach for handling interrupts within guest virtual
machines directly and securely. By removing the host
from the interrupt handling path, ELI manages to
improve the throughput and latency of unmodified,
untrusted guests by 1.3x-1.6x, allowing them to reach
97\%-100\% of bare-metal performance even for the most
demanding I/O-intensive workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vasic:2012:DAR,
author = "Nedeljko Vasi{\'c} and Dejan Novakovi{\'c} and
Svetozar Miucin and Dejan Kosti{\'c} and Ricardo
Bianchini",
title = "{DejaVu}: accelerating resource allocation in
virtualized environments",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "423--436",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151021",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Effective resource management of virtualized
environments is a challenging task. State-of-the-art
management systems either rely on analytical models or
evaluate resource allocations by running actual
experiments. However, both approaches incur a
significant overhead once the workload changes. The
former needs to re-calibrate and re-validate models,
whereas the latter has to run a new set of experiments
to select a new resource allocation. During the
adaptation period, the system may run with an
inefficient configuration. In this paper, we propose
DejaVu --- a framework that (1) minimizes the resource
management overhead by identifying a small set of
workload classes for which it needs to evaluate
resource allocation decisions, (2) quickly adapts to
workload changes by classifying workloads using
signatures and caching their preferred resource
allocations at runtime, and (3) deals with interference
by estimating an ``interference index''. We evaluate
DejaVu by running representative network services on
Amazon EC2. DejaVu achieves more than 10x speedup in
adaptation time for each workload change relative to
the state-of-the-art. By enabling quick adaptation,
DejaVu saves up to 60\% of the service provisioning
cost. Finally, DejaVu is easily deployable as it does
not require any extensive instrumentation or human
intervention.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Szefer:2012:ASH,
author = "Jakub Szefer and Ruby B. Lee",
title = "Architectural support for hypervisor-secure
virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "437--450",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151022",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "Virtualization has become a standard part of many
computer systems. A key part of virtualization is the
all-powerful hypervisor which manages the physical
platform and can access all of its resources, including
memory assigned to the guest virtual machines (VMs).
Continuing releases of bug reports and exploits in the
virtualization software show that defending the
hypervisor against attacks is very difficult. In this
work, we present hypervisor-secure virtualization --- a
new research direction with the goal of protecting the
guest VMs from an untrusted hypervisor. We also present
the HyperWall architecture which achieves
hypervisor-secure virtualization, using hardware to
provide the protections. HyperWall allows a hypervisor
to freely manage the memory, processor cores and other
resources of a platform. Yet once VMs are created, our
new Confidentiality and Integrity Protection (CIP)
tables protect the memory of the guest VMs from
accesses by the hypervisor or by DMA, depending on the
customer's specification. If a hypervisor does become
compromised, e.g. by an attack from a malicious VM, it
cannot be used in turn to attack other VMs. The
protections are enabled through minimal modifications
to the microprocessor and memory management units.
Whereas much of the previous work concentrates on
protecting the hypervisor from attacks by guest VMs, we
tackle the problem of protecting the guest VMs from the
hypervisor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:2012:RSE,
author = "Min Lee and Karsten Schwan",
title = "Region scheduling: efficiently using the cache
architectures via page-level affinity",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "1",
pages = "451--462",
month = mar,
year = "2012",
DOI = "https://doi.org/10.1145/2189750.2151023",
bibdate = "Fri Jun 1 17:06:46 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ASPLOS '12 conference proceedings.",
abstract = "The performance of modern many-core platforms strongly
depends on the effectiveness of using their complex
cache and memory structures. This indicates the need
for a memory-centric approach to platform scheduling,
in which it is the locations of memory blocks in caches
rather than CPU idleness that determines where
application processes are run. Using the term `memory
region' to denote the current set of physical memory
pages actively used by an application, this paper
presents and evaluates region-based scheduling methods
for multicore platforms. This involves (i) continuously
and at runtime identifying the memory regions used by
executable entities, and their sizes, (ii) mapping
these regions to caches to match performance goals, and
(iii) maintaining region to cache mappings by ensuring
that entities run on processors with direct access to
the caches containing their regions. Region scheduling
can implement policies that (i) offer improved
performance to applications by `unifying' the multiple
caches present on the underlying physical machine
and/or by `balancing' cache usage to take maximum
advantage of available cache space, (ii) better isolate
applications from each other, particularly when their
performance is strongly affected by cache availability,
and also (iii) take advantage of standard scheduling
and CPU-based load balancing when regioning is
ineffective. The paper describes region scheduling and
its system-level implementation and evaluates its
performance with micro-benchmarks and representative
multi-core applications. Single applications see
performance improvements of up to 15\% with region
scheduling, and we observe 40\% latency improvements
when a platform is shared by multiple applications.
Superior isolation is shown to be particularly
important for cache-sensitive or real-time codes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Juurlink:2012:ALP,
author = "B. H. H. Juurlink and C. H. Meenderinck",
title = "{Amdahl}'s law for predicting the future of multicores
considered harmful",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "2",
pages = "1--9",
month = may,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2234336.2234338",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 1 17:06:51 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Several recent works predict the future of multicore
systems or identify scalability bottlenecks based on
Amdahl's law. Amdahl's law implicitly assumes, however,
that the problem size stays constant, but in most cases
more cores are used to solve larger and more complex
problems. There is a related law known as Gustafson's
law which assumes that runtime, not the problem size,
is constant. In other words, it is assumed that the
runtime on p cores is the same as the runtime on 1 core
and that the parallel part of an application scales
linearly with the number of cores. We apply Gustafson's
law to symmetric, asymmetric, and dynamic multicores
and show that this leads to fundamentally different
results than when Amdahl's law is applied. We also
generalize Amdahl's and Gustafson's law and study how
this quantitatively effects the dimensioning of future
multicore systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mueller:2012:ABA,
author = "Conrad Mueller",
title = "Axiom based architecture",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "2",
pages = "10--17",
month = may,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2234336.2234339",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 1 17:06:51 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The paper proposes an axiom based architecture as an
alternative to the von Neumann model. The model has
many desirable properties: fine-grained parallelism,
simple semantics, better security and easy of
programming. The empirical research gives some
indication of its performance potential. A description
is given as to how algebraic arithmetic expressions of
relations can be broken up into primitive expressions
consisting of a single operation. These primitive
relations are shown to be sufficient to describe a
Turing machine. Eight inference rules are given that
define how the primitive relations can be evaluated. An
outline is given of an architecture based on these
inference rules. Finally a brief description is given
of an experimental emulation and empirical evaluation
of the architecture. Instead of manipulating data or
values by applying instructions or functions,
computation is applying existing elements to relations
to create new elements. The element's identifier
determines which relations the element applies to. The
relation determines the identifier of the new element
and the operation that needs to be applied to create
the value of the new element. The conceptually indices
are different in this model. Instead of seeing an index
as an offset into an array, an index is seen as part of
the element identifier. This enables infinitely many
relations to be defined between unique sets using
universal quantifiers. Thus every element, or value,
computed has a unique description.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomasian:2012:RPR,
author = "Alexander Thomasian",
title = "Rebuild processing in {RAID5} with emphasis on the
supplementary parity augmentation method",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "2",
pages = "18--27",
month = may,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2234336.2234340",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 1 17:06:51 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The rotated parity RAID5 disk array tolerates single
disk failures by continuing operation by on-demand
reconstruction of data blocks of the failed disk, until
the systematic reconstruction of the contents of the
failed disk is completed by the rebuild process on a
spare disk. Supplementary Parity Augmentation (SPA),
unlike the pyramid code, which has two parities
covering half of the arrays disks each, extends RAID5's
P parity with an additional S parity, which covers half
of the disks. The extra load with respect to RAID5 of
updating the S parity by one half of the disks is
compensated by the more efficient on demand
reconstruction and rebuild processing when a disk
fails. Although SPA has the same disk space redundancy
level as RAID6, unlike RAID6 it can only deal with
roughly half of all possible double disk failure cases
for eight disks. For rebuild processing SPA reads half
of the disks required by RAID5 and this leads to a
higher Mean Time to Data Loss than RAID5, since fewer
Latent Sector Errors are encountered. We review
performance and reliability modeling of RAID5 arrays to
provide insights into SPA's performance and
reliability, which cannot be gained from numerical
results alone. SPA is outperformed by the Intra-Disk
Redundancy schemes combined with RAID5, which results
in RAID6's reliability and RAID5 performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Giri:2012:FIN,
author = "Nishant Kumar Giri and Amitabha Sinha",
title = "{FPGA} implementation of a novel architecture for
performance enhancement of Radix-2 {FFT}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "2",
pages = "28--32",
month = may,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2234336.2234341",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 1 17:06:51 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a novel architecture for the
enhancement of performance of compute intensive Fast
Fourier Transform (FFT) algorithm which is common in
many signal processing applications. The proposed
architecture exhibits faster response time compared to
radix-2 `Single-path Delay Feedback (SDF)' architecture
and `radix-2 Multi-path Delay Commutator (MDC)'
architecture. The architecture was simulated using
Modelsim and was implemented on Xilinx Virtex 4 FPGA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghosh:2012:NAF,
author = "Aniruddha Ghosh and Satrughna Singha and Amitabha
Sinha",
title = "A new architecture for {FPGA} implementation of a
{MAC} unit for digital signal processors using mixed
number system",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "2",
pages = "33--38",
month = may,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2234336.2234342",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 1 17:06:51 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Execution of arithmetic operations at very high speed
in real time is the major concern in digital signal
processing (DSP) because DSP algorithms are computation
intensive. In recent times, Residue Number Systems
(RNS) are considered as alternative to binary number
system because of their capabilities of performing
``carry-free'' addition and Multiplication. Double Base
Number Systems (DBNS), another non-binary number
systems are also increasingly becoming attractive for
signal processing applications due to their
capabilities of handling arithmetic operations,
particularly multiplication efficiently. However, the
complexity involved in converting binary to DBNS
becomes a major bottleneck and the efficiency of
performance decreases considerably due to large
conversion time. So RNS Adder and DBNS Multiplier can
be used to implement multiply \& accumulate (MAC)
units. Because RNS adders are less complex and faster
compared to DBNS and DBNS multipliers are efficient
compared to RNS multiplier. MAC units are the key units
in Digital Signal Processors. In this paper we have
shown how FIR filter can be implemented using the
proposed ``Mixed Number System MAC units''.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghosh:2012:FPR,
author = "Aniruddha Ghosh and Satrughna Singha and Amitabha
Sinha",
title = "{``Floating point RNS''}: a new concept for designing
the {MAC} unit of digital signal processor",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "2",
pages = "39--43",
month = may,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2234336.2234343",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 1 17:06:51 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Execution of arithmetic operations at a very high
speed in real time is the major concern in compute
intensive digital signal processing (DSP) algorithms
Residue Number Systems are being considered as
alternative to binary number system because of their
capabilities of performing ``carry free'' arithmetic
operations. However, RNS systems have so far been used
to handle integer numbers only. Floating Point RNS
arithmetic units have obvious advantages over fixed
point multiply \& accumulate (MAC) units which are
the key units in Digital Signal Processors. Keeping
this in view, in this paper, the architecture of a
floating point MAC unit is presented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2012:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "2",
pages = "44--49",
month = may,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2234336.2234345",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 1 17:06:51 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:2012:RRA,
author = "Jamie Liu and Ben Jaiyen and Richard Veras and Onur
Mutlu",
title = "{RAIDR}: {Retention-Aware Intelligent DRAM Refresh}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "1--12",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337161",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Dynamic random-access memory (DRAM) is the building
block of modern main memory systems. DRAM cells must be
periodically refreshed to prevent loss of data. These
refresh operations waste energy and degrade system
performance by interfering with memory accesses. The
negative effects of DRAM refresh increase as DRAM
device capacity increases. Existing DRAM devices
refresh all cells at a rate determined by the leakiest
cell in the device. However, most DRAM cells can retain
data for significantly longer. Therefore, many of these
refreshes are unnecessary. In this paper, we propose
RAIDR (Retention-Aware Intelligent DRAM Refresh), a
low-cost mechanism that can identify and skip
unnecessary refreshes using knowledge of cell retention
times. Our key idea is to group DRAM rows into
retention time bins and apply a different refresh rate
to each bin. As a result, rows containing leaky cells
are refreshed as frequently as normal, while most rows
are refreshed less frequently. RAIDR uses Bloom filters
to efficiently implement retention time bins. RAIDR
requires no modification to DRAM and minimal
modification to the memory controller. In an 8-core
system with 32 GB DRAM, RAIDR achieves a 74.6\% refresh
reduction, an average DRAM power reduction of 16.1\%,
and an average system performance improvement of 8.6\%
over existing systems, at a modest storage overhead of
1.25 KB in the memory controller. RAIDR's benefits are
robust to variation in DRAM system configuration, and
increase as memory capacity increases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bojnordi:2012:PPM,
author = "Mahdi Nazm Bojnordi and Engin Ipek",
title = "{PARDIS}: a programmable memory controller for the
{DDRx} interfacing standards",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "13--24",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337162",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Modern memory controllers employ sophisticated address
mapping, command scheduling, and power management
optimizations to alleviate the adverse effects of DRAM
timing and resource constraints on system performance.
A promising way of improving the versatility and
efficiency of these controllers is to make them
programmable---a proven technique that has seen wide
use in other control tasks ranging from DMA scheduling
to NAND Flash and directory control. Unfortunately, the
stringent latency and throughput requirements of modern
DDRx devices have rendered such programmability largely
impractical, confining DDRx controllers to
fixed-function hardware. This paper presents the
instruction set architecture (ISA) and hardware
implementation of PARDIS, a programmable memory
controller that can meet the performance requirements
of a high-speed DDRx interface. The proposed controller
is evaluated by mapping previously proposed DRAM
scheduling, address mapping, refresh scheduling, and
power management algorithms onto PARDIS. Simulation
results show that the average performance of PARDIS
comes within 8\% of fixed-function hardware for each of
these techniques; moreover, by enabling
application-specific optimizations, PARDIS improves
system performance by 6--17\% and reduces DRAM energy
by 9--22\% over four existing memory controllers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yoon:2012:BEM,
author = "Doe Hyun Yoon and Jichuan Chang and Naveen
Muralimanohar and Parthasarathy Ranganathan",
title = "{BOOM}: enabling mobile memory based low-power server
{DIMMs}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "25--36",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337163",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "To address the real-time processing needs of large and
growing amounts of data, modern software increasingly
uses main memory as the primary data store for critical
information. This trend creates a new emphasis on
high-capacity, high-bandwidth, and high-reliability
main memory systems. Conventional and recently-proposed
server memory techniques can satisfy these
requirements, but at the cost of significantly
increased memory power, a key constraint for future
memory systems. In this paper, we exploit the low-power
nature of another high volume memory component---mobile
DRAM---while improving its bandwidth and reliability
shortcomings with a new DIMM architecture. We propose
Buffered Output On Module (BOOM) that buffers the data
outputs from multiple ranks of low-frequency mobile
DRAM devices, which in aggregation provide high
bandwidth and achieve chipkill-correct or even stronger
reliability. Our evaluation shows that BOOM can reduce
main memory power by more than 73\% relative to the
baseline chipkill system, while improving average
performance by 5\% and providing strong reliability.
For memory-intensive applications, BOOM can improve
performance by 30--40\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Malladi:2012:TEP,
author = "Krishna T. Malladi and Benjamin C. Lee and Frank A.
Nothaft and Christos Kozyrakis and Karthika
Periyathambi and Mark Horowitz",
title = "Towards energy-proportional datacenter memory with
mobile {DRAM}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "37--48",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337164",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "To increase datacenter energy efficiency, we need
memory systems that keep pace with processor efficiency
gains. Currently, servers use DDR3 memory, which is
designed for high bandwidth but not for energy
proportionality. A system using 20\% of the peak DDR3
bandwidth consumes 2.3x the energy per bit compared to
the energy consumed by a system with fully utilized
memory bandwidth. Nevertheless, many datacenter
applications stress memory capacity and latency but not
memory bandwidth. In response, we architect server
memory systems using mobile DRAM devices, trading peak
bandwidth for lower energy consumption per bit and more
efficient idle modes. We demonstrate 3-5x lower memory
power, better proportionality, and negligible
performance penalties for datacenter workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Brunie:2012:SBW,
author = "Nicolas Brunie and Sylvain Collange and Gregory
Diamos",
title = "Simultaneous branch and warp interweaving for
sustained {GPU} performance",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "49--60",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337166",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Single-Instruction Multiple-Thread (SIMT)
micro-architectures implemented in Graphics Processing
Units (GPUs) run fine-grained threads in lockstep by
grouping them into units, referred to as warps, to
amortize the cost of instruction fetch, decode and
control logic over multiple execution units. As
individual threads take divergent execution paths,
their processing takes place sequentially, defeating
part of the efficiency advantage of SIMD execution. We
present two complementary techniques that mitigate the
impact of thread divergence on SIMT
micro-architectures. Both techniques relax the SIMD
execution model by allowing two distinct instructions
to be scheduled to disjoint subsets of the the same row
of execution units, instead of one single instruction.
They increase flexibility by providing more thread
grouping opportunities than SIMD, while preserving the
affinity between threads to avoid introducing extra
memory divergence. We consider (1) co-issuing
instructions from different divergent paths of the same
warp and (2) co-issuing instructions from different
warps. To support (1), we introduce a novel thread
reconvergence technique that ensures threads are run
back in lockstep at control-flow reconvergence points
without hindering their ability to run branches in
parallel. We propose a lane shuffling technique to
allow solution (2) to benefit from inter-warp
correlations in divergence patterns. The combination of
all these techniques improves performance by 23\% on a
set of regular GPGPU applications and by 40\% on
irregular applications, while maintaining the same
instruction-fetch and processing-unit resource
requirements as the contemporary Fermi GPU
architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rhu:2012:CPC,
author = "Minsoo Rhu and Mattan Erez",
title = "{CAPRI}: prediction of compaction-adequacy for
handling control-divergence in {GPGPU} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "61--71",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337167",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Wide SIMD-based GPUs have evolved into a promising
platform for running general purpose workloads. Current
programmable GPUs allow even code with irregular
control to execute well on their SIMD pipelines. To do
this, each SIMD lane is considered to execute a logical
thread where hardware ensures that control flow is
accurate by automatically applying masked execution.
The masked execution, however, often degrades
performance because the issue slots of masked lanes are
wasted. This degradation can be mitigated by
dynamically compacting multiple unmasked threads into a
single SIMD unit. This paper proposes a fundamentally
new approach to branch compaction that avoids the
unnecessary synchronization required by previous
techniques and that only stalls threads that are likely
to benefit from compaction. Our technique is based on
the compaction-adequacy predictor (CAPRI). CAPRI
dynamically identifies the compaction-effectiveness of
a branch and only stalls threads that are predicted to
benefit from compaction. We utilize a simple
single-level branch-predictor inspired structure and
show that this simple configuration attains a
prediction accuracy of 99.8\% and 86.6\% for
non-divergent and divergent workloads, respectively.
Our performance evaluation demonstrates that CAPRI
consistently outperforms both the baseline design that
never attempts compaction and prior work that stalls
upon all divergent branches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Menon:2012:IES,
author = "Jaikrishnan Menon and Marc {De Kruijf} and Karthikeyan
Sankaralingam",
title = "{iGPU}: exception support and speculative execution on
{GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "72--83",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337168",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Since the introduction of fully programmable vertex
shader hardware, GPU computing has made tremendous
advances. Exception support and speculative execution
are the next steps to expand the scope and improve the
usability of GPUs. However, traditional mechanisms to
support exceptions and speculative execution are highly
intrusive to GPU hardware design. This paper builds on
two related insights to provide a unified lightweight
mechanism for supporting exceptions and speculation on
GPUs. First, we observe that GPU programs can be broken
into code regions that contain little or no live
register state at their entry point. We then also
recognize that it is simple to generate these regions
in such a way that they are idempotent, allowing their
entry points to function as program recovery points and
enabling support for exception handling, fast context
switches, and speculation, all with very low overhead.
We call the architecture of GPUs executing these
idempotent regions the iGPU architecture. The hardware
extensions required are minimal and the construction of
idempotent code regions is fully transparent under the
typical dynamic compilation framework of GPUs. We
demonstrate how iGPU exception support enables virtual
memory paging with very low overhead (1\% to 4\%), and
how speculation support enables circuit-speculation
techniques that can provide over 25\% reduction in
energy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arnau:2012:BMG,
author = "Jos{\'e}-Mar{\'\i}a Arnau and Joan-Manuel Parcerisa
and Polychronis Xekalakis",
title = "Boosting mobile {GPU} performance with a decoupled
access\slash execute fragment processor",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "84--93",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337169",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Smartphones represent one of the fastest growing
markets, providing significant hardware/software
improvements every few months. However, supporting
these capabilities reduces the operating time per
battery charge. The CPU/GPU component is only left with
a shrinking fraction of the power budget, since most of
the energy is consumed by the screen and the antenna.
In this paper, we focus on improving the energy
efficiency of the GPU since graphical applications
consist an important part of the existing market.
Moreover, the trend towards better screens will
inevitably lead to a higher demand for improved
graphics rendering. We show that the main bottleneck
for these applications is the texture cache and that
traditional techniques for hiding memory latency
(prefetching, multithreading) do not work well or come
at a high energy cost. We thus propose the migration of
GPU designs towards the decoupled access-execute
concept. Furthermore, we significantly reduce bandwidth
usage in the decoupled architecture by exploiting
inter-core data sharing. Using commercial Android
applications, we show that the end design can achieve
93\% of the performance of a heavily multithreaded GPU
while providing energy savings of 34\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kayaalp:2012:BRL,
author = "Mehmet Kayaalp and Meltem Ozsoy and Nael Abu-Ghazaleh
and Dmitry Ponomarev",
title = "Branch regulation: low-overhead protection from code
reuse attacks",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "94--105",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337171",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Code reuse attacks (CRAs) are recent security exploits
that allow attackers to execute arbitrary code on a
compromised machine. CRAs, exemplified by
return-oriented and jump-oriented programming
approaches, reuse fragments of the library code, thus
avoiding the need for explicit injection of attack code
on the stack. Since the executed code is reused
existing code, CRAs bypass current hardware and
software security measures that prevent execution from
data or stack regions of memory. While software-based
full control flow integrity (CFI) checking can protect
against CRAs, it includes significant overhead,
involves non-trivial effort of constructing a control
flow graph, relies on proprietary tools and has
potential vulnerabilities due to the presence of
unintended branch instructions in architectures such as
x86---those branches are not checked by the software
CFI. We propose branch regulation (BR), a lightweight
hardware-supported protection mechanism against the
CRAs that addresses all limitations of software CFI. BR
enforces simple control flow rules in hardware at the
function granularity to disallow arbitrary control flow
transfers from one function into the middle of another
function. This prevents common classes of CRAs without
the complexity and run-time overhead of full CFI
enforcement. BR incurs a slowdown of about 2\% and
increases the code footprint by less than 1\% on the
average for the SPEC 2006 benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Demme:2012:SCV,
author = "John Demme and Robert Martin and Adam Waksman and
Simha Sethumadhavan",
title = "Side-channel vulnerability factor: a metric for
measuring information leakage",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "106--117",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337172",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "There have been many attacks that exploit side-effects
of program execution to expose secret information and
many proposed countermeasures to protect against these
attacks. However there is currently no systematic,
holistic methodology for understanding information
leakage. As a result, it is not well known how design
decisions affect information leakage or the
vulnerability of systems to side-channel attacks. In
this paper, we propose a metric for measuring
information leakage called the Side-channel
Vulnerability Factor (SVF). SVF is based on our
observation that all side-channel attacks ranging from
physical to microarchitectural to software rely on
recognizing leaked execution patterns. SVF quantifies
patterns in attackers' observations and measures their
correlation to the victim's actual execution patterns
and in doing so captures systems' vulnerability to
side-channel attacks. In a detailed case study of
on-chip memory systems, SVF measurements help expose
unexpected vulnerabilities in whole-system designs and
shows how designers can make performance-security
trade-offs. Thus, SVF provides a quantitative approach
to secure computer architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Martin:2012:TRT,
author = "Robert Martin and John Demme and Simha Sethumadhavan",
title = "{TimeWarp}: rethinking timekeeping and performance
monitoring mechanisms to mitigate side-channel
attacks",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "118--129",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337173",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Over the past two decades, several microarchitectural
side channels have been exploited to create
sophisticated security attacks. Solutions to this
problem have mainly focused on fixing the source of
leaks either by limiting the flow of information
through the side channel by modifying hardware, or by
refactoring vulnerable software to protect sensitive
data from leaking. These solutions are reactive and not
preventative: while the modifications may protect
against a single attack, they do nothing to prevent
future side channel attacks that exploit other
microarchitectural side channels or exploit the same
side channel in a novel way. In this paper we present a
general mitigation strategy that focuses on the
infrastructure used to measure side channel leaks
rather than the source of leaks, and thus applies to
all known and unknown microarchitectural side channel
leaks. Our approach is to limit the fidelity of fine
grain timekeeping and performance counters, making it
difficult for an attacker to distinguish between
different microarchitectural events, thus thwarting
attacks. We demonstrate the strength of our proposed
security modifications, and validate that our changes
do not break existing software. Our proposed changes
require minor --- or in some cases, no --- hardware
modifications and do not result in any substantial
performance degradation, yet offer the most
comprehensive protection against microarchitectural
side channels to date.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Valamehr:2012:IRM,
author = "Jonathan Valamehr and Melissa Chase and Seny Kamara
and Andrew Putnam and Dan Shumow and Vinod
Vaikuntanathan and Timothy Sherwood",
title = "Inspection resistant memory: architectural support for
security from physical examination",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "130--141",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337174",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "The ability to safely keep a secret in memory is
central to the vast majority of security schemes, but
storing and erasing these secrets is a difficult
problem in the face of an attacker who can obtain
unrestricted physical access to the underlying
hardware. Depending on the memory technology, the very
act of storing a 1 instead of a 0 can have physical
side effects measurable even after the power has been
cut. These effects cannot be hidden easily, and if the
secret stored on chip is of sufficient value, an
attacker may go to extraordinary means to learn even a
few bits of that information. Solving this problem
requires a new class of architectures that measurably
increase the difficulty of physical analysis. In this
paper we take a first step towards this goal by
focusing on one of the backbones of any hardware
system: on-chip memory. We examine the relationship
between security, area, and efficiency in these
architectures, and quantitatively examine the resulting
systems through cryptographic analysis and
microarchitectural impact. In the end, we are able to
find an efficient scheme in which, even if an adversary
is able to inspect the value of a stored bit with a
probabilistic error of only 5\%, our system will be
able to prevent that adversary from learning any
information about the original un-coded bits with
99.9999999999\% probability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Xu:2012:TPV,
author = "Yi Xu and Jun Yang and Rami Melhem",
title = "Tolerating process variations in nanophotonic on-chip
networks",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "142--152",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337176",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Nanophontonic networks, a potential candidate for
future networks on-chip, have been challenged for their
reliability due to several device-level limitations.
One of the main issues is that fabrication errors
(a.k.a. process variations) can cause devices to
malfunction, rendering communication unreliable. For
example, microring resonator, a preferred optical
modulator device, may not resonate at the designated
wavelength under process variations (PV), leading to
communication errors and bandwidth loss. This paper
proposes a series of solutions to the wavelength
drifting problem of microrings and subsequent bandwidth
loss problem of an optical network, due to PV. The
objective is to maximize network bandwidth through
proper arrangement among microrings and wavelengths
with minimum power requirement. Our arrangement, called
``MinTrim'', solves this problem using simple integer
linear programming, adding supplementary microrings and
allowing flexible assignment of wavelengths to network
nodes as long as the resulting network presents maximal
bandwidth. Each step is shown to improve bandwidth
provisioning with lower power requirement. Evaluations
on a sample network show that a baseline network could
lose more than 40\% bandwidth due to PV. Such loss can
be recovered by MinTrim to produce a network with
98.4\% working bandwidth. In addition, the power
required in arranging microrings is 39\% lower than the
baseline. Therefore, MinTrim provides an efficient
PV-tolerant solution to improving the reliability of
on-chip photonics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Koka:2012:MAA,
author = "Pranay Koka and Michael O. McCracken and Herb
Schwetman and Chia-Hsin Owen Chen and Xuezhe Zheng and
Ron Ho and Kannan Raj and Ashok V. Krishnamoorthy",
title = "A micro-architectural analysis of switched photonic
multi-chip interconnects",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "153--164",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337177",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Silicon photonics is a promising technology to scale
offchip bandwidth in a power-efficient manner. Given
equivalent bandwidth, the flexibility of switched
networks often leads to the assumption that they
deliver greater performance than point-to-point
networks on message passing applications with low-radix
traffic patterns. However, when optical losses are
considered and total optical power is constrained, this
assumption no longer holds. In this paper we present a
power constrained method for designing photonic
interconnects that uses the power characteristics and
limits of optical switches, waveguide crossings,
inter-layer couplers and waveguides. We apply this
method to design three switched network topologies for
a multi-chip system. Using synthetic and HPC
benchmark-derived message patterns, we simulated the
three switched networks and a WDM point-to-point
network. We show that switched networks outperform
point-to-point networks only when the optical losses of
switches and inter-layer couplers losses are each 0.75
dB or lower; achieving this would require a major
breakthrough in device development. We then show that
this result extends to any switched network with
similarly complex topology, through simulations of an
idealized ``perfect'' network that supports 90\% of the
peak bandwidth under all traffic patterns. We conclude
that given a fixed amount of input optical power, under
realistic device assumptions, a point-to-point network
has the best performance and energy characteristics.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Carpenter:2012:EET,
author = "Aaron Carpenter and Jianyun Hu and Ovunc Kocabas and
Michael Huang and Hui Wu",
title = "Enhancing effective throughput for transmission
line-based bus",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "165--176",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337178",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Main-stream general-purpose microprocessors require a
collection of high-performance interconnects to supply
the necessary data movement. The trend of continued
increase in core count has prompted designs of
packet-switched network as a scalable solution for
future-generation chips. However, the cost of
scalability can be significant and especially hard to
justify for smaller-scale chips. In contrast, a
circuit-switched bus using transmission lines and
corresponding circuits offers lower latencies and much
lower energy costs for smaller-scale chips, making it a
better choice than a full-blown network-on-chip (NoC)
architecture. However, shared-medium designs are
perceived as only a niche solution for small- to
medium-scale chips. In this paper, we show that there
are many low-cost mechanisms to enhance the effective
throughput of a bus architecture. When a handful of
highly cost-effective techniques are applied, the
performance advantage of even the most idealistically
configured NoCs becomes vanishingly small. We find
transmission line-based buses to be a more compelling
interconnect even for large-scale chip-multiprocessors,
and thus bring into doubt the centrality of packet
switching in future on-chip interconnect.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Koibuchi:2012:CRS,
author = "Michihiro Koibuchi and Hiroki Matsutani and Hideharu
Amano and D. Frank Hsu and Henri Casanova",
title = "A case for random shortcut topologies for {HPC}
interconnects",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "177--188",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337179",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "As the scales of parallel applications and platforms
increase the negative impact of communication latencies
on performance becomes large. Fortunately, modern High
Performance Computing (HPC) systems can exploit
low-latency topologies of high-radix switches. In this
context, we propose the use of random shortcut
topologies, which are generated by augmenting classical
topologies with random links. Using graph analysis we
find that these topologies, when compared to non-random
topologies of the same degree, lead to drastically
reduced diameter and average shortest path length. The
best results are obtained when adding random links to a
ring topology, meaning that good random shortcut
topologies can easily be generated for arbitrary
numbers of switches. Using flit-level discrete event
simulation we find that random shortcut topologies
achieve throughput comparable to and latency lower than
that of existing non-random topologies such as
hypercubes and tori. Finally, we discuss and quantify
practical challenges for random shortcut topologies,
including routing scalability and larger physical cable
lengths.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nagarakatte:2012:WHS,
author = "Santosh Nagarakatte and Milo M. K. Martin and Steve
Zdancewic",
title = "{Watchdog}: hardware for safe and secure manual memory
management and full memory safety",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "189--200",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337181",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Languages such as C and C++ use unsafe manual memory
management, allowing simple bugs (i.e., accesses to an
object after deallocation) to become the root cause of
exploitable security vulnerabilities. This paper
proposes Watchdog, a hardware-based approach for
ensuring safe and secure manual memory management.
Inspired by prior software-only proposals, Watchdog
generates a unique identifier for each memory
allocation, associates these identifiers with pointers,
and checks to ensure that the identifier is still valid
on every memory access. This use of identifiers and
checks enables Watchdog to detect errors even in the
presence of reallocations. Watchdog stores these
pointer identifiers in a disjoint shadow space to
provide comprehensive protection and ensure
compatibility with existing code. To streamline the
implementation and reduce runtime overhead: Watchdog
(1) uses micro-ops to access metadata and perform
checks, (2) eliminates metadata copies among registers
via modified register renaming, and (3) uses a
dedicated metadata cache to reduce checking overhead.
Furthermore, this paper extends Watchdog's mechanisms
to detect bounds errors, thereby providing full
hardware-enforced memory safety at low overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Devietti:2012:RAS,
author = "Joseph Devietti and Benjamin P. Wood and Karin Strauss
and Luis Ceze and Dan Grossman and Shaz Qadeer",
title = "{RADISH}: always-on sound and complete
{{\underline{Ra}ce \underline{D}etection \underline{i}n
\underline{S}oftware and \underline{H}ardware}}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "201--212",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337182",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Data-race freedom is a valuable safety property for
multithreaded programs that helps with catching bugs,
simplifying memory consistency model semantics, and
verifying and enforcing both atomicity and determinism.
Unfortunately, existing software-only dynamic race
detectors are precise but slow; proposals with hardware
support offer higher performance but are imprecise.
Both precision and performance are necessary to achieve
the many advantages always-on dynamic race detection
could provide. To resolve this trade-off, we propose
Radish, a hybrid hardware-software dynamic race
detector that is always-on and fully precise. In
Radish, hardware caches a principled subset of the
metadata necessary for race detection; this subset
allows the vast majority of race checks to occur
completely in hardware. A flexible software layer
handles persistence of race detection metadata on cache
evictions and occasional queries to this expanded set
of metadata. We show that Radish is correct by proving
equivalence to a conventional happens-before race
detector. Our design has modest hardware complexity:
caches are completely unmodified and we piggy-back on
existing coherence messages but do not otherwise modify
the protocol. Furthermore, Radish can leverage
type-safe languages to reduce overheads substantially.
Our evaluation of a simulated 8-core Radish processor
using PARSEC benchmarks shows runtime overheads from
negligible to 2x, outperforming the leading
software-only race detector by 2x-37x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{VanCraeynest:2012:SHM,
author = "Kenzo {Van Craeynest} and Aamer Jaleel and Lieven
Eeckhout and Paolo Narvaez and Joel Emer",
title = "Scheduling heterogeneous multi-cores through
{Performance Impact Estimation (PIE)}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "213--224",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337184",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Single-ISA heterogeneous multi-core processors are
typically composed of small (e.g., in-order)
power-efficient cores and big (e.g., out-of-order)
high-performance cores. The effectiveness of
heterogeneous multi-cores depends on how well a
scheduler can map workloads onto the most appropriate
core type. In general, small cores can achieve good
performance if the workload inherently has high levels
of ILP. On the other hand, big cores provide good
performance if the workload exhibits high levels of MLP
or requires the ILP to be extracted dynamically. This
paper proposes Performance Impact Estimation (PIE) as a
mechanism to predict which workload-to-core mapping is
likely to provide the best performance. PIE collects
CPI stack, MLP and ILP profile information, and
estimates performance if the workload were to run on a
different core type. Dynamic PIE adjusts the scheduling
at runtime and thereby exploits fine-grained
time-varying execution behavior. We show that PIE
requires limited hardware support and can improve
system performance by an average of 5.5\% over recent
state-of-the-art scheduling proposals and by 8.7\% over
a sampling-based scheduling policy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cao:2012:YYP,
author = "Ting Cao and Stephen M. Blackburn and Tiejun Gao and
Kathryn S. McKinley",
title = "The yin and yang of power and performance for
asymmetric hardware and managed software",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "225--236",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337185",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "On the hardware side, asymmetric multicore processors
present software with the challenge and opportunity of
optimizing in two dimensions: performance and power.
Asymmetric multicore processors (AMP) combine
general-purpose big (fast, high power) cores and small
(slow, low power) cores to meet power constraints.
Realizing their energy efficiency opportunity requires
workloads with differentiated performance and power
characteristics. On the software side, managed
workloads written in languages such as C\#, Java,
JavaScript, and PHP are ubiquitous. Managed languages
abstract over hardware using Virtual Machine (VM)
services (garbage collection, interpretation, and/or
just-in-time compilation) that together impose
substantial energy and performance costs, ranging from
10\% to over 80\%. We show that these services manifest
a differentiated performance and power workload. To
differing degrees, they are parallel, asynchronous,
communicate infrequently, and are not on the
application?s critical path. We identify a synergy
between AMP and VM services that we exploit to attack
the 40\% average energy overhead due to VM services.
Using measurements and very conservative models, we
show that adding small cores tailored for VM services
should deliver, at least, improvements in performance
of 13\%, energy of 7\%, and performance per energy of
22\%. The yin of VM services is overhead, but it meets
the yang of small cores on an AMP. The yin of AMP is
exposed hardware complexity, but it meets the yang of
abstraction in managed languages. VM services fulfill
the AMP requirement for an asynchronous, non-critical,
differentiated, parallel, and ubiquitous workload to
deliver energy efficiency. Generalizing this approach
beyond system software to applications will require
substantially more software and hardware investment,
but these results show the potential energy efficiency
gains are significant.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Krimer:2012:LDI,
author = "Evgeni Krimer and Patrick Chiang and Mattan Erez",
title = "Lane decoupling for improving the timing-error
resiliency of wide-{SIMD} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "237--248",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337187",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "A significant portion of the energy dissipated in
modern integrated circuits is consumed by the overhead
associated with timing guardbands that ensure reliable
execution. Timing speculation, where the pipeline
operates at an unsafe voltage with any rare errors
detected and resolved by the architecture, has been
demonstrated to significantly improve the
energy-efficiency of scalar processor designs.
Unfortunately, applying the same timing-speculative
approach to wide-SIMD architectures, such as those used
in highly-efficient GPUs, may not provide similar
gains. In this work, we make two important
contributions. The first is a set of models describing
a parametrized general error probability function that
is based on measurements of a fabricated chip and the
expected efficiency benefits of timing speculation in a
SIMD context. The second contribution is a decoupled
SIMD pipeline that more effectively utilizes timing
speculation and recovery, when compared with a standard
SIMD design that uses only conventional timing
speculation. The proposed lane decoupling enables each
SIMD lane to tolerate timing errors independent of
other adjacent lanes, resulting in higher throughput
and improved scalability. We validate our models and
evaluate our design using a cycle-based GPU simulator,
describe the conditions where efficiency improvements
can be obtained, and explore the benefits of decoupling
across a wide range of parameters. Our results show
that timing speculation can achieve up to 10.3\%
improvement in efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Miller:2012:VCE,
author = "Timothy N. Miller and Renji Thomas and Xiang Pan and
Radu Teodorescu",
title = "{VRSync}: characterizing and eliminating
synchronization-induced voltage emergencies in
many-core processors",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "249--260",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337188",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Power consumption is a primary concern for
microprocessor designers. Lowering the supply voltage
of processors is one of the most effective techniques
for improving their energy efficiency. Unfortunately,
low-voltage operation faces multiple challenges going
forward. One such challenge is increased sensitivity to
voltage fluctuations, which can trigger so-called
``voltage emergencies'' that can lead to errors. These
fluctuations are caused by abrupt changes in power
demand, triggered by processor activity variation as a
function of workload. This paper examines the effects
of voltage fluctuations on future many-core processors.
With the increase in the number of cores in a chip, the
effects of chip-wide activity fluctuation --- such as
that caused by global synchronization in multithreaded
applications --- overshadow the effects of core-level
workload variability. Starting from this observation,
we developed VRSync, a novel synchronization
methodology that uses emergency-aware scheduling
policies that reduce the slope of load fluctuations,
eliminating emergencies. We show that VRSync is very
effective at eliminating emergencies, allowing voltage
guardbands to be significantly lowered, which reduces
energy consumption by an average of 33\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Doudalis:2012:EFU,
author = "Ioannis Doudalis and Milos Prvulovic",
title = "{Euripus}: a flexible unified hardware memory
checkpointing accelerator for bidirectional-debugging
and reliability",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "261--272",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337190",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Bidirectional debugging and error recovery have
different goals (programmer productivity and system
reliability, respectively), yet they both require the
ability to roll-back the program or the system to a
past state. This rollback functionality is typically
implemented using checkpoints that can restore the
system/application to a specific point in time. There
are several types of checkpoints, and bidirectional
debugging and error-recovery use them in different
ways. This paper presents Euripus$^1$, a flexible
hardware accelerator for memory checkpointing which can
create different combinations of checkpoints needed for
bidirectional debugging, error recovery, or both. In
particular, Euripus is the first hardware technique to
provide consolidation-friendly undo-logs (for
bidirectional debugging), to allow simultaneous
construction of both undo and redo logs, and to support
multi-level checkpointing for the needs of
error-recovery. Euripus incurs low performance
overheads ({$<$5}\% on average), improves roll-back
latency for bidirectional debugging by {$>$30}\%, and
supports rapid multi-level error recovery that allows
{$>$95}\% system efficiency even with very high error
rates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nair:2012:FOM,
author = "Arun Arvind Nair and Stijn Eyerman and Lieven Eeckhout
and Lizy Kurian John",
title = "A first-order mechanistic model for architectural
vulnerability factor",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "273--284",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337191",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Soft error reliability has become a first-order design
criterion for modern microprocessors. Architectural
Vulnerability Factor (AVF) modeling is often used to
capture the probability that a radiation-induced fault
in a hardware structure will manifest as an error at
the program output. AVF estimation requires detailed
microarchitectural simulations which are time-consuming
and typically present aggregate metrics. Moreover, it
requires a large number of simulations to derive
insight into the impact of microarchitectural events on
AVF. In this work we present a first-order mechanistic
analytical model for computing AVF by estimating the
occupancy of correct-path state in important
microarchitecture structures through inexpensive
profiling. We show that the model estimates the AVF for
the reorder buffer, issue queue, load and store queue,
and functional units in a 4-wide issue machine with a
mean absolute error of less than 0.07. The model is
constructed from the first principles of out-of-order
processor execution in order to provide novel insight
into the interaction of the workload with the
microarchitecture to determine AVF. We demonstrate that
the model can be used to perform design space
explorations to understand trade-offs between soft
error rate and performance, to study the impact of
scaling of microarchitectural structures on AVF and
performance, and to characterize workloads for AVF.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Udipi:2012:LEL,
author = "Aniruddha N. Udipi and Naveen Muralimanohar and Rajeev
Balsubramonian and Al Davis and Norman P. Jouppi",
title = "{LOT-ECC}: localized and tiered reliability mechanisms
for commodity memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "285--296",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337192",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Memory system reliability is a serious and growing
concern in modern servers. Existing chipkill-level
memory protection mechanisms suffer from several
drawbacks. They activate a large number of chips on
every memory access --- this increases energy
consumption, and reduces performance due to the
reduction in rank-level parallelism. Additionally, they
increase access granularity, resulting in wasted
bandwidth in the absence of sufficient access locality.
They also restrict systems to use narrow-I/O x4
devices, which are known to be less energy-efficient
than the wider x8 DRAM devices. In this paper, we
present LOT-ECC, a localized and multi-tiered
protection scheme that attempts to solve these
problems. We separate error detection and error
correction functionality, and employ simple checksum
and parity codes effectively to provide strong
fault-tolerance, while simultaneously simplifying
implementation. Data and codes are localized to the
same DRAM row to improve access efficiency. We use
system firmware to store correction codes in DRAM data
memory and modify the memory controller to handle data
mapping. We thus build an effective fault-tolerance
mechanism that provides strong reliability guarantees,
activates as few chips as possible (reducing power
consumption by up to 44.8\% and reducing latency by up
to 46.9\%), and reduces circuit complexity, all while
working with commodity DRAMs and operating systems.
Finally, we propose the novel concept of a
heterogeneous DIMM that enables the extension of
LOT-ECC to x16 and wider DRAM parts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Basu:2012:RMR,
author = "Arkaprava Basu and Mark D. Hill and Michael M. Swift",
title = "Reducing memory reference energy with opportunistic
virtual caching",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "297--308",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337194",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Most modern cores perform a highly-associative
transaction look aside buffer (TLB) lookup on every
memory access. These designs often hide the TLB lookup
latency by overlapping it with L1 cache access, but
this overlap does not hide the power dissipated by TLB
lookups. It can even exacerbate the power dissipation
by requiring higher associativity L1 cache. With
today's concern for power dissipation, designs could
instead adopt a virtual L1 cache, wherein TLB access
power is dissipated only after L1 cache misses.
Unfortunately, virtual caches have compatibility
issues, such as supporting writeable synonyms and x86's
physical page table walker. This work proposes an
Opportunistic Virtual Cache (OVC) that exposes virtual
caching as a dynamic optimization by allowing some
memory blocks to be cached with virtual addresses and
others with physical addresses. OVC relies on small OS
changes to signal which pages can use virtual caching
(e.g., no writeable synonyms), but defaults to physical
caching for compatibility. We show OVC's promise with
analysis that finds virtual cache problems exist, but
are dynamically rare. We change 240 lines in Linux
2.6.28 to enable OVC. On experiments with Parsec and
commercial workloads, the resulting system saves
94-99\% of TLB lookup energy and nearly 23\% of L1
cache dynamic lookup energy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2012:IWE,
author = "Zhe Wang and Samira M. Khan and Daniel A.
Jim{\'e}nez",
title = "Improving writeback efficiency with decoupled
last-write prediction",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "309--320",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337195",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "In modern DDRx memory systems, memory write requests
compete with read requests for available memory
resources, significantly increasing the average read
request service time. Caches are used to mitigate long
memory read latency that limits system performance.
Dirty blocks in the last-level cache (LLC) that will
not be written again before they are evicted will
eventually be written back to memory. We refer to these
blocks as last-write blocks. In this paper, we propose
an LLC writeback technique that improves DRAM
efficiency by scheduling predicted last-write blocks
early. We propose a low overhead last-write predictor
for the LLC. The predicted last-write blocks are made
available to the memory controller for scheduling. This
technique effectively re-distributes the memory
requests and expands writes scheduling opportunities,
allowing writes to be serviced efficiently by DRAM. The
technique is flexible enough to be applied to any LLC
replacement policy. Our evaluation with
multi-programmed workloads shows that the technique
significantly improves performance by 6.5\%-11.4\% on
average over the traditional writeback technique in an
eight-core processor with various DRAM configurations
running memory intensive benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sim:2012:FBC,
author = "Jaewoong Sim and Jaekyu Lee and Moinuddin K. Qureshi
and Hyesoon Kim",
title = "{FLEXclusion}: balancing cache capacity and on-chip
bandwidth via flexible exclusion",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "321--332",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337196",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Exclusive last-level caches (LLCs) reduce memory
accesses by effectively utilizing cache capacity.
However, they require excessive on-chip bandwidth to
support frequent insertions of cache lines on eviction
from upper-level caches. Non-inclusive caches, on the
other hand, have the advantage of using the on-chip
bandwidth more effectively but suffer from a higher
miss rate. Traditionally, the decision to use the cache
as exclusive or non-inclusive is made at design time.
However, the best option for a cache organization
depends on application characteristics, such as working
set size and the amount of traffic consumed by LLC
insertions. This paper proposes FLEXclusion, a design
that dynamically selects between exclusion and
non-inclusion depending on workload behavior. With
FLEXclusion, the cache behaves like an exclusive cache
when the application benefits from extra cache
capacity, and it acts as a non-inclusive cache when
additional cache capacity is not useful, so that it can
reduce on-chip bandwidth. FLEXclusion leverages the
observation that both non-inclusion and exclusion rely
on similar hardware support, so our proposal can be
implemented with negligible hardware changes. Our
evaluations show that a FLEXclusive cache reduces the
on-chip LLC insertion traffic by 72.6\% compared to an
exclusive design and improves performance by 5.9\%
compared to a non-inclusive design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Upasani:2012:SED,
author = "Gaurang Upasani and Xavier Vera and Antonio
Gonz{\'a}lez",
title = "Setting an error detection infrastructure with low
cost acoustic wave detectors",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "333--343",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337198",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "The continuing decrease in dimensions and operating
voltage of transistors has increased their sensitivity
against radiation phenomena making soft errors an
important challenge in future chip multiprocessors
(CMPs). Hence, new techniques for detecting errors in
the logic and memories that allow meeting the desired
failures-in-time (FIT) budget in CMPs are required.
This paper proposes a low-cost dynamic particle strike
detection mechanism through acoustic wave detectors.
Our results show that our mechanism can protect both
the logic and the memory arrays. As a case study, we
also show how this technique can be combined with error
codes to protect the last-level cache at low cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pellegrini:2012:VVP,
author = "Andrea Pellegrini and Joseph L. Greathouse and Valeria
Bertacco",
title = "{Viper}: virtual pipelines for enhanced reliability",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "344--355",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337199",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "The reliability of future processors is threatened by
decreasing transistor robustness. Current architectures
focus on delivering high performance at low cost;
lifetime device reliability is a secondary concern. As
the rate of permanent hardware faults increases,
robustness will become a first class constraint for
even low-cost systems. Current research into reliable
architectures has focused on ad-hoc solutions to
improve designs without altering their centralized
control logic. Unfortunately, this centralized control
presents a single point of failure, which limits
long-term robustness. To address this issue, we
introduce Viper, an architecture built from a redundant
collection of fine-grained hardware components.
Instructions are perceived as customers that require a
sequence of services in order to properly execute. The
hardware components vie to perform what services they
can, dynamically forming virtual pipelines that avoid
defective hardware. This is done using distributed
control logic, which avoids a single point of failure
by construction. Viper can tolerate a high number of
permanent faults due to its inherent redundancy. As
fault counts increase, its performance degrades more
gracefully than traditional centralized-logic
architectures. We estimate that fault rates higher than
one permanent faults per 12 million transistors, on
average, cause the throughput of a classic CMP design
to fall below that of a Viper design of similar size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Temam:2012:DTA,
author = "Olivier Temam",
title = "A defect-tolerant accelerator for emerging
high-performance applications",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "356--367",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337200",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Due to the evolution of technology constraints,
especially energy constraints which may lead to
heterogeneous multi-cores, and the increasing number of
defects, the design of defect-tolerant accelerators for
heterogeneous multi-cores may become a major
micro-architecture research issue. Most custom circuits
are highly defect sensitive, a single transistor can
wreck such circuits. On the contrary, artificial neural
networks (ANNs) are inherently error tolerant
algorithms. And the emergence of high-performance
applications implementing recognition and mining tasks,
for which competitive ANN-based algorithms exist,
drastically expands the potential application scope of
a hardware ANN accelerator. However, while the error
tolerance of ANN algorithms is well documented, there
are few in-depth attempts at demonstrating that an
actual hardware ANN would be tolerant to faulty
transistors. Most fault models are abstract and cannot
demonstrate that the error tolerance of ANN algorithms
can be translated into the defect tolerance of hardware
ANN accelerators. In this article, we introduce a
hardware ANN geared towards defect tolerance and energy
efficiency, by spatially expanding the ANN. In order to
precisely assess the defect tolerance capability of
this hardware ANN, we introduce defects at the level of
transistors, and then assess the impact of such defects
on the hardware ANN functional behavior. We empirically
show that the conceptual error tolerance of neural
networks does translate into the defect tolerance of
hardware neural networks, paving the way for their
introduction in heterogeneous multi-cores as
intrinsically defect-tolerant and energy-efficient
accelerators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2012:CES,
author = "Yoongu Kim and Vivek Seshadri and Donghyuk Lee and
Jamie Liu and Onur Mutlu",
title = "A case for exploiting subarray-level parallelism
{(SALP)} in {DRAM}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "368--379",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337202",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Modern DRAMs have multiple banks to serve multiple
memory requests in parallel. However, when two requests
go to the same bank, they have to be served serially,
exacerbating the high latency of off-chip memory.
Adding more banks to the system to mitigate this
problem incurs high system cost. Our goal in this work
is to achieve the benefits of increasing the number of
banks with a low cost approach. To this end, we propose
three new mechanisms that overlap the latencies of
different requests that go to the same bank. The key
observation exploited by our mechanisms is that a
modern DRAM bank is implemented as a collection of
subarrays that operate largely independently while
sharing few global peripheral structures. Our proposed
mechanisms (SALP-1, SALP-2, and MASA) mitigate the
negative impact of bank serialization by overlapping
different components of the bank access latencies of
multiple requests that go to different subarrays within
the same bank. SALP-1 requires no changes to the
existing DRAM structure and only needs reinterpretation
of some DRAM timing parameters. SALP-2 and MASA require
only modest changes ({$<$} 0.15\% area overhead) to the
DRAM peripheral structures, which are much less design
constrained than the DRAM core. Evaluations show that
all our schemes significantly improve performance for
both single-core systems and multi-core systems. Our
schemes also interact positively with application-aware
memory request scheduling in multi-core systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Qureshi:2012:PIP,
author = "Moinuddin K. Qureshi and Michele M. Franceschini and
Ashish Jagmohan and Luis A. Lastras",
title = "{PreSET}: improving performance of phase change
memories by exploiting asymmetry in write times",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "380--391",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337203",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Phase Change Memory (PCM) is a promising technology
for building future main memory systems. A prominent
characteristic of PCM is that it has write latency much
higher than read latency. Servicing such slow writes
causes significant contention for read requests. For
our baseline PCM system, the slow writes increase the
effective read latency by almost 2X, causing
significant performance degradation. This paper
alleviates the problem of slow writes by exploiting the
fundamental property of PCM devices that writes are
slow only in one direction (SET operation) and are
almost as fast as reads in the other direction (RESET
operation). Therefore, a write operation to a line in
which all memory cells have been SET prior to the
write, will incur much lower latency. We propose
PreSET, an architectural technique that leverages this
property to pro-actively SET all the bits in a given
memory line well in advance of the anticipated write to
that memory line. Our proposed design initiates a
PreSET request for a memory line as soon as that line
becomes dirty in the cache, thereby allowing a large
window of time for the PreSET operation to complete.
Our evaluations show that PreSET is more effective and
incurs lower storage overhead than previously proposed
write cancellation techniques. We also describe static
and dynamic throttling schemes to limit the rate of
PreSET operations. Our proposal reduces effective read
latency from 982 cycles to 594 cycles and increases
system performance by 34\%, while improving the
energy-delay-product by 25\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cooper-Balis:2012:BBM,
author = "Elliott Cooper-Balis and Paul Rosenfeld and Bruce
Jacob",
title = "Buffer-on-board memory systems",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "392--403",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337204",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "The design and implementation of the commodity memory
architecture has resulted in significant performance
and capacity limitations. To circumvent these
limitations, designers and vendors have begun to place
intermediate logic between the CPU and DRAM. This
additional logic has two functions: to control the DRAM
and to communicate with the CPU over a fast and narrow
bus. The benefit provided by this logic is a reduction
in pin-out to the memory system and increased signal
integrity to the DRAM, allowing faster clock rates
while maintaining capacity. While the few vendors
utilizing this design have used the same general
approach, their implementations vary greatly in their
nontrivial details. A hardware-verified simulation
suite is developed to accurately model and evaluate the
behavior of this buffer-onboard memory system. A study
of this design space is used to determine optimal use
of the resources involved. This includes DRAM and bus
organization, queue storage, and mapping schemes.
Various constraints based on implementation costs are
placed on simulated configurations to confirm that
these optimizations apply to viable systems. Finally,
full system simulations are performed to better
understand how this memory system interacts with an
operating system executing an application with the goal
of uncovering behaviors not present in simple limit
case simulations. When applying insights gleaned from
these simulations, optimal performance can be achieved
while still considering outside constraints (i.e.,
pin-out, power, and fabrication costs).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jung:2012:PAQ,
author = "Myoungsoo Jung and Ellis H. {Wilson III} and Mahmut
Kandemir",
title = "{Physically Addressed Queueing (PAQ)}: improving
parallelism in solid state disks",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "404--415",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337206",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "NAND flash storage has proven to be a competitive
alternative to traditional disk for its properties of
high random-access speeds, low-power and its presumed
efficacy for random-reads. Ironically, we demonstrate
that when packaged in SSD format, there arise many
barriers to reaching full parallelism in reads,
resulting in random writes out-performing them.
Motivated by this, we propose Physically Addressed
Queuing (PAQ), a request scheduler that avoids resource
contention resultant from shared SSD resources. PAQ
makes the following major contributions: First, it
exposes the physical addresses of requests to the
scheduler. Second, I/O clumping is utilized to select
groups of operations that can be simultaneously
executed without major resource conflict. Third,
inter-request NAND transaction packing empowers
multi-plane-mode operations. We implement PAQ in a
cycle-accurate simulator and demonstrate bandwidth and
IOPS improvements greater than 62\% and latency
decreases as much as 41.6\% for random reads, without
degrading performance of other access types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ausavarungnirun:2012:SMS,
author = "Rachata Ausavarungnirun and Kevin Kai-Wei Chang and
Lavanya Subramanian and Gabriel H. Loh and Onur Mutlu",
title = "Staged memory scheduling: achieving high performance
and scalability in heterogeneous systems",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "416--427",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337207",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "When multiple processor (CPU) cores and a GPU
integrated together on the same chip share the off-chip
main memory, requests from the GPU can heavily
interfere with requests from the CPU cores, leading to
low system performance and starvation of CPU cores.
Unfortunately, state-of-the-art application-aware
memory scheduling algorithms are ineffective at solving
this problem at low complexity due to the large amount
of GPU traffic. A large and costly request buffer is
needed to provide these algorithms with enough
visibility across the global request stream, requiring
relatively complex hardware implementations. This paper
proposes a fundamentally new approach that decouples
the memory controller's three primary tasks into three
significantly simpler structures that together improve
system performance and fairness, especially in
integrated CPU-GPU systems. Our three-stage memory
controller first groups requests based on row-buffer
locality. This grouping allows the second stage to
focus only on inter-application request scheduling.
These two stages enforce high-level policies regarding
performance and fairness, and therefore the last stage
consists of simple per-bank FIFO queues (no further
command reordering within each bank) and
straightforward logic that deals only with low-level
DRAM commands and timing. We evaluate the design
trade-offs involved in our Staged Memory Scheduler
(SMS) and compare it against three state-of-the-art
memory controller designs. Our evaluations show that
SMS improves CPU performance without degrading GPU
frame rate beyond a generally acceptable level, while
being significantly less complex to implement than
previous application-aware schedulers. Furthermore, SMS
can be configured by the system software to prioritize
the CPU or the GPU at varying levels to address
different performance needs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Manikantan:2012:PSC,
author = "R. Manikantan and Kaushik Rajan and R. Govindarajan",
title = "{Probabilistic Shared Cache Management (PriSM)}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "428--439",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337208",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Effective sharing of the last level cache has a
significant influence on the overall performance of a
multicore system. We observe that existing solutions
control cache occupancy at a coarser granularity, do
not scale well to large core counts and in some cases
lack the flexibility to support a variety of
performance goals. In this paper, we propose
Probabilistic Shared Cache Management (PriSM), a
framework to manage the cache occupancy of different
cores at cache block granularity by controlling their
eviction probabilities. The proposed framework requires
only simple hardware changes to implement, can scale to
larger core count and is flexible enough to support a
variety of performance goals. We demonstrate the
flexibility of PriSM, by computing the eviction
probabilities needed to achieve goals like
hit-maximization, fairness and QOS. PriSM-HitMax
improves performance by 18.7\% over LRU and 11.8\% over
previously proposed schemes in a sixteen core machine.
PriSM-Fairness improves fairness over existing
solutions by 23.3\% along with a performance
improvement of 19.0\%. PriSM-QOS successfully achieves
the desired QOS targets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Satish:2012:CTP,
author = "Nadathur Satish and Changkyu Kim and Jatin Chhugani
and Hideki Saito and Rakesh Krishnaiyer and Mikhail
Smelyanskiy and Milind Girkar and Pradeep Dubey",
title = "Can traditional programming bridge the {Ninja}
performance gap for parallel computing applications?",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "440--451",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337210",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Current processor trends of integrating more cores
with wider SIMD units, along with a deeper and complex
memory hierarchy, have made it increasingly more
challenging to extract performance from applications.
It is believed by some that traditional approaches to
programming do not apply to these modern processors and
hence radical new languages must be discovered. In this
paper, we question this thinking and offer evidence in
support of traditional programming methods and the
performance-vs-programming effort effectiveness of
common multi-core processors and upcoming many-core
architectures in delivering significant speedup, and
close-to-optimal performance for commonly used parallel
computing workloads. We first quantify the extent of
the ``Ninja gap'', which is the performance gap between
naively written C/C++ code that is parallelism unaware
(often serial) and best-optimized code on modern
multi-/many-core processors. Using a set of
representative throughput computing benchmarks, we show
that there is an average Ninja gap of 24X (up to 53X )
for a recent 6-core Intel\reg{} CoreTM i7 X980 Westmere
CPU, and that this gap if left unaddressed will
inevitably increase. We show how a set of well-known
algorithmic changes coupled with advancements in modern
compiler technology can bring down the Ninja gap to an
average of just 1.3X. These changes typically require
low programming effort, as compared to the very high
effort in producing Ninja code. We also discuss
hardware support for programmability that can reduce
the impact of these changes and even further increase
programmer productivity. We show equally encouraging
results for the upcoming Intel\reg{} Many Integrated
Core architecture (Intel\reg{} MIC) which has more
cores and wider SIMD. We thus demonstrate that we can
contain the otherwise uncontrolled growth of the Ninja
gap and offer a more stable and predictable performance
growth over future architectures, offering strong
evidence that radical language changes are not
required.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kambadur:2012:HCA,
author = "Melanie Kambadur and Kui Tang and Martha A. Kim",
title = "{Harmony}: collection and analysis of parallel block
vectors",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "452--463",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337211",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Efficient execution of well-parallelized applications
is central to performance in the multicore era. Program
analysis tools support the hardware and software sides
of this effort by exposing relevant features of
multithreaded applications. This paper describes
parallel block vectors, which uncover previously unseen
characteristics of parallel programs. Parallel block
vectors provide block execution profiles per
concurrency phase (e.g., the block execution profile of
all serial regions of a program). This information
provides a direct and fine-grained mapping between an
application's runtime parallel phases and the static
code that makes up those phases. This paper also
demonstrates how to collect parallel block vectors with
minimal application perturbation using Harmony. Harmony
is an instrumentation pass for the LLVM compiler that
introduces just 16-21\% overhead on average across
eight Parsec benchmarks. We apply parallel block
vectors to uncover several novel insights about
parallel applications with direct consequences for
architectural design. First, that the serial and
parallel phases of execution used in Amdahl's Law are
often composed of many of the same basic blocks.
Second, that program features, such as instruction mix,
vary based on the degree of parallelism, with serial
phases in particular displaying different instruction
mixes from the program as a whole. Third, that dynamic
execution frequencies do not necessarily correlate with
a block's parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wentzlaff:2012:CFG,
author = "David Wentzlaff and Christopher J. Jackson and Patrick
Griffin and Anant Agarwal",
title = "Configurable fine-grain protection for multicore
processor virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "464--475",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337213",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Multicore architectures, with their abundant on-chip
resources, are effectively collections of
systems-on-a-chip. The protection system for these
architectures must support multiple concurrently
executing operating systems (OSes) with different
needs, and manage and protect the hardware's novel
communication mechanisms and hardware features.
Traditional protection systems are insufficient; they
protect supervisor from user code, but typically do not
protect one system from another, and only support fixed
assignment of resources to protection levels. In this
paper, we propose an alternative to traditional
protection systems which we call configurable
fine-grain protection (CFP). CFP enables the dynamic
assignment of in-core resources to protection levels.
We investigate how CFP enables different system
software stacks to utilize the same configurable
protection hardware, and how differing OSes can execute
at the same time on a multicore processor with CFP. As
illustration, we describe an implementation of CFP in a
commercial multicore, the TILE64 processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ahn:2012:RHA,
author = "Jeongseob Ahn and Seongwook Jin and Jaehyuk Huh",
title = "Revisiting hardware-assisted page walks for
virtualized systems",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "476--487",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337214",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Recent improvements in architectural supports for
virtualization have extended traditional hardware page
walkers to traverse nested page tables. However,
current two-dimensional (2D) page walkers have been
designed under the assumption that the usage patterns
of guest and nested page tables are similar. In this
paper, we revisit the architectural supports for nested
page table walks to incorporate the unique
characteristics of memory management by hypervisors.
Unlike page tables in native systems, nested page table
sizes do not impose significant overheads on the
overall memory usage. Based on this observation, we
propose to use flat nested page tables to reduce
unnecessary memory references for nested walks. A
competing mechanism to HW 2D page walkers is shadow
paging, which duplicates guest page tables but provides
direct translations from guest virtual to system
physical addresses. However, shadow paging has been
suffering from the overheads of synchronization between
guest and shadow page tables. The second mechanism we
propose is a speculative shadow paging mechanism,
called speculative inverted shadow paging, which is
backed by non-speculative flat nested page tables. The
speculative mechanism provides a direct translation
with a single memory reference for common cases, and
eliminates the page table synchronization overheads. We
evaluate the proposed schemes with the real Xen
hypervisor running on a full system simulator. The flat
page tables improve a state-of-the-art 2D page walker
with a page walk cache and nested TLB by 7\%. The
speculative shadow paging improves the same 2D page
walker by 14\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kontorinis:2012:MDU,
author = "Vasileios Kontorinis and Liuyi Eric Zhang and Baris
Aksanli and Jack Sampson and Houman Homayoun and Eddie
Pettis and Dean M. Tullsen and Tajana Simunic Rosing",
title = "Managing distributed {UPS} energy for effective power
capping in data centers",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "488--499",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337216",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Power over-subscription can reduce costs for modern
data centers. However, designing the power
infrastructure for a lower operating power point than
the aggregated peak power of all servers requires
dynamic techniques to avoid high peak power costs and,
even worse, tripping circuit breakers. This work
presents an architecture for distributed per-server
UPSs that stores energy during low activity periods and
uses this energy during power spikes. This work
leverages the distributed nature of the UPS batteries
and develops policies that prolong the duration of
their usage. The specific approach shaves 19.4\% of the
peak power for modern servers, at no cost in
performance, allowing the installation of 24\% more
servers within the same power budget. More servers
amortize infrastructure costs better and, hence, reduce
total cost of ownership per server by 6.3\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lotfi-Kamran:2012:SP,
author = "Pejman Lotfi-Kamran and Boris Grot and Michael Ferdman
and Stavros Volos and Onur Kocberber and Javier Picorel
and Almutaz Adileh and Djordje Jevdjic and Sachin
Idgunji and Emre Ozer and Babak Falsafi",
title = "Scale-out processors",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "500--511",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337217",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Scale-out datacenters mandate high per-server
throughput to get the maximum benefit from the large
TCO investment. Emerging applications (e.g., data
serving and web search) that run in these datacenters
operate on vast datasets that are not accommodated by
on-die caches of existing server chips. Large caches
reduce the die area available for cores and lower
performance through long access latency when
instructions are fetched. Performance on scale-out
workloads is maximized through a modestly-sized
last-level cache that captures the instruction
footprint at the lowest possible access latency. In
this work, we introduce a methodology for designing
scalable and efficient scale-out server processors.
Based on a metric of performance-density, we facilitate
the design of optimal multi-core configurations, called
pods. Each pod is a complete server that tightly
couples a number of cores to a small last-level cache
using a fast interconnect. Replicating the pod to fill
the die area yields processors which have optimal
performance density, leading to maximum per-chip
throughput. Moreover, as each pod is a stand-alone
server, scale-out processors avoid the expense of
global (i.e., inter-pod) interconnect and coherence.
These features synergistically maximize throughput,
lower design complexity, and improve technology
scalability. In 20nm technology, scale-out chips
improve throughput by 5x-6.5x over conventional and by
1.6x-1.9x over emerging tiled organizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:2012:ICO,
author = "Chao Li and Amer Qouneh and Tao Li",
title = "{iSwitch}: coordinating and optimizing renewable
energy powered server clusters",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "512--523",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337218",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Large-scale computing systems such as data centers are
facing increasing pressure to cap their carbon
footprint. Integrating emerging clean energy solutions
into computer system design therefore gains great
significance in the green computing era. While some
pioneering work on tracking variable power budget show
promising energy efficiency, they are not suitable for
data centers due to lack of performance guarantee when
renewable generation is low and fluctuant. In addition,
our characterization of wind power behavior reveals
that data centers designed to track the intermittent
renewable power incur up to 4X performance loss due to
inefficient and redundant load matching activities. As
a result, mitigating operational overhead while still
maintaining desired energy utilization becomes the most
significant challenge in managing server clusters on
intermittent renewable energy generation. In this paper
we take a first step in digging into the operational
overhead of renewable energy powered data center. We
propose iSwitch, a lightweight server power management
that follows renewable power variation characteristics,
leverages existing system infrastructures, and applies
supply/load cooperative scheme to mitigate the
performance overhead. Comparing with state-of-the-art
renewable energy driven system design, iSwitch could
mitigate average network traffic by 75\%, peak network
traffic by 95\%, and reduce 80\% job waiting time while
still maintaining 96\% renewable energy utilization. We
expect that our work can help computer architects make
informed decisions on sustainable and high-performance
system design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Singh:2012:EES,
author = "Abhayendra Singh and Satish Narayanasamy and Daniel
Marino and Todd Millstein and Madanlal Musuvathi",
title = "End-to-end sequential consistency",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "524--535",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337220",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Sequential consistency (SC) is arguably the most
intuitive behavior for a shared-memory multithreaded
program. It is widely accepted that language-level SC
could significantly improve programmability of a
multiprocessor system. However, efficiently supporting
end-to-end SC remains a challenge as it requires that
both compiler and hardware optimizations preserve SC
semantics. While a recent study has shown that a
compiler can preserve SC semantics for a small
performance cost, an efficient and complexity-effective
SC hardware remains elusive. Past hardware solutions
relied on aggressive speculation techniques, which has
not yet been realized in a practical implementation.
This paper exploits the observation that hardware need
not enforce any memory model constraints on accesses to
thread-local and shared read-only locations. A
processor can easily determine a large fraction of
these safe accesses with assistance from static
compiler analysis and the hardware memory management
unit. We discuss a low-complexity hardware design that
exploits this information to reduce the overhead in
ensuring SC. Our design employs an additional unordered
store buffer for fast-tracking thread-local stores and
allowing later memory accesses to proceed without a
memory ordering related stall. Our experimental study
shows that the cost of guaranteeing end-to-end SC is
only 6.2\% on average when compared to a system with
TSO hardware executing a stock compiler's output.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mars:2012:BDS,
author = "Jason Mars and Naveen Kumar",
title = "{BlockChop}: dynamic squash elimination for hybrid
processor architecture",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "536--547",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337221",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Hybrid processors are HW/SW co-designed processors
that leverage blocked-execution, the execution of
regions of instructions as atomic blocks, to facilitate
aggressive speculative optimization. As we move to a
multicore hybrid design, fine grained conflicts for
shared data can violate the atomicity requirement of
these blocks and lead to expensive squashes and
rollbacks. However, as these atomic regions differ from
those used in checkpointing and transactional memory
systems, the extent of this potentially prohibitive
problem remains unclear, and mechanisms to mitigate
these squashes dynamically may be critical to enable a
highly per-formant multicore hybrid design. In this
work, we investigate how multithreaded applications,
both benchmark and commercial workloads, are affected
by squashes, and present dynamic mechanisms for
mitigating these squashes in hybrid processors. While
the current wisdom is that there is not a significant
number of squashes for smaller atomic regions, we
observe this is not the case for many multithreaded
workloads. With region sizes of just 200--500
instructions, we observe a performance degradation
ranging from 10\% to more than 50\% for workloads with
a mixture of shared reads and writes. By harnessing the
unique flexibility provided by the software subsystem
of hybrid processor design, we present BlockChop, a
framework for dynamically mitigating squashes on
multicore hybrid processors. We present a range of
squash handling mechanisms leveraging retrials,
interpretation, and retranslation, and find that
BlockChop is quite effective. Over the current response
to exceptions and squashes in a hybrid design, we are
able to improve the performance of benchmark and
commercial workloads by 1.4x and 1.2x on average for
large and small region sizes respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yoon:2012:DGM,
author = "Doe Hyun Yoon and Min Kyu Jeong and Michael Sullivan
and Mattan Erez",
title = "The dynamic granularity memory system",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "3",
pages = "548--559",
month = jun,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2366231.2337222",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 6 10:21:07 MDT 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ISCA '12 conference proceedings.",
abstract = "Chip multiprocessors enable continued performance
scaling with increasingly many cores per chip. As the
throughput of computation outpaces available memory
bandwidth, however, the system bottleneck will shift to
main memory. We present a memory system, the dynamic
granularity memory system (DGMS), which avoids
unnecessary data transfers, saves power, and improves
system performance by dynamically changing between fine
and coarse-grained memory accesses. DGMS predicts
memory access granularities dynamically in hardware,
and does not require software or OS support. The
dynamic operation of DGMS gives it superior ease of
implementation and power efficiency relative to prior
multi-granularity memory systems, while maintaining
comparable levels of system performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aguilera:2012:AEW,
author = "Marcos K. Aguilera and Dahlia Malkhi and Keith
Marzullo and Alessandro Panconesi and Andrzej Pelc and
Roger Wattenhofer",
title = "Announcing the {2012 Edsger W. Dijkstra Prize in
Distributed Computing}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "4",
pages = "1--2",
month = sep,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2411116.2411118",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 11 08:06:57 MST 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maitra:2012:NAC,
author = "Subhashis Maitra and Amitabha Sinha",
title = "A new algorithm for computing triple-base number
system",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "4",
pages = "3--9",
month = sep,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2411116.2411119",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 11 08:06:57 MST 2012",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We introduce here a generalized method a new Algorithm
to find Triple-Base number system and Triple-Base chain
and hence in turn Single Digit Triple-Base number
system (SDTBNS). The proposed method is not only
simpler and faster than the Algorithms to find
Double-Base number system or Double-Base chain,
experimentally it also returns a shorter length of
Triple-Base chain which in turn reduces the size of the
look-up-table to find out SDTBNS. The complexity
analysis and experimental results shows the novelty of
the proposed Algorithm. Moreover when the proposed
method is applied to find scalar multiplication in case
of Elliptic Curve Cryptography and coefficient
multiplication in case of designing digital filter, its
efficiency also proves its novelty. Here we have used
third base as $5$ because when it is multiplied by $2$
gives $ 10$ which can be efficiently used for decimal
shifting, i.e. if an integer '$n$' can be represented
in SDTBNS form, then $ n / 10_x$ or $ n \times 10_x$
can also represented in SDTBNS only by diving or
multiplying '$n$' by $ 10$.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kumar:2012:NLT,
author = "Shiv Kumar and Seshadri Krishna Murthy and G.
Varaprasad and S. Sivasathya",
title = "Network load and traffic pattern on the capacity of
wireless ad hoc networks",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "4",
pages = "10--25",
month = sep,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2411116.2411120",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 11 08:06:57 MST 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper focuses on the capacity of wireless ad hoc
networks and analyzes the effect of key factors viz.
network size, traffic patterns and detailed local radio
interactions on the capacity of such networks. The
capacity is evaluated with several different network
layouts and traffic patterns through simulations. To
demonstrate the impact of these factors, the capacity
evaluation starts with a simple case of a chain of
evenly spaced nodes in a network environment and
progresses to a network with random traffic and
randomly spaced nodes. Initially, capacity of static
nodes is evaluated for various network layouts and
traffic patterns. Since, in most scenarios, nodes do
not travel significant distances during packet
transmissions. As an enhancement, mobility of nodes is
introduced into the network scenario and the
performance is again evaluated. The simulations are
carried out using OPNET modeler and the results
obtained are presented in this report. The results are
analyzed to understand the impact of these factors on
the capacity and consequently suggest measures to
increase the same. This work shows that the achievable
capacity of ad hoc network depends on network size,
traffic pattern and mobility. In a single cell
topology, it is found that there is a 50\% reduction in
network throughput, if the node size increases from 2
to 10 nodes, whereas there is a 74\% reduction in the
throughput for chain topology for the same increase in
node size. In a lattice topology with horizontal
traffic, there is a 46 \% reduction in network
throughput when the lattice size increases from $ 4
\times 4 $ to $ 5 \times 5 $. The same percentage of
reduction is observed when both horizontal and network
traffic is introduced. In a random network topology
with random traffic, there is an 80 \% reduction in
network throughput when the node size increases from
150 to 750 nodes. However, for the same scenario with
the introduction of mobility to the nodes, a slight
improvement is achieved with an overall 75\% reduction
in network throughput.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Isa:2012:EAS,
author = "M. N. Isa and K. Benkrid and T. Clayton",
title = "Efficient architecture and scheduling technique for
pairwise sequence alignment",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "4",
pages = "26--31",
month = sep,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2411116.2411121",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 11 08:06:57 MST 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A novel efficient hardware architecture to optimize
the functions to be implemented on FPGAs. This riding
curve of execution time of dynamic programming-based
(DP) pairwise the process technology emerges the use of
FPGAs in sequence alignment algorithms in hardware is
proposed. It is realized by introducing an efficient
overlapped scheduling of alignment matrix computation
and substitution coefficients' pre-loading onto
processing elements (PEs) in folded systolic arrays. A
new metric is also proposed as an independent
performance evaluator to compare different core
implementations on different FPGA platforms fairly.
Implementation results show that the new hardware
architecture for sequence alignment achieves a minimum
of 40 percent area normalized speed-up compared to the
state-of-the-art hardware implementation, with the
speed-up growing linearly with the number of folds e.g.
120 percent speed up for 16-fold. Compared to
equivalent software implementations, the novel hardware
architecture achieves a minimum of $ 103 \times $
speed-up, with the speed-up growing linearly with the
number of folds e.g. $ 140 \times $ speed-up for
20-fold.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Oudjida:2012:NHR,
author = "A. K. Oudjida and N. Chaillet and M. L. Berrandjia and
A. Liacha",
title = "A new high radix-2 $r$ ($ r \geq 8$) multibit recoding
algorithm for large operand size ({$ N \geq 32$})
multipliers",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "4",
pages = "32--43",
month = sep,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2411116.2411122",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 11 08:06:57 MST 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper addresses the problem of multiplication
with large operand sizes ($ N \geq 32$). We propose a
new recursive recoding algorithm that shortens the
critical path of the multiplier and reduces the
hardware complexity of partial-product-generators as
well. The new recoding algorithm provides an optimal
space/time partitioning of the multiplier architecture
for any size $N$ of the operands. As a result, the
critical path is drastically reduced to $ 3^3 \sqrt N /
2 - 3$ with no area overhead in comparison to modified
Booth algorithm that shows a critical path of $ N / 2$
in adder stages. For instance, only $7$ adder stages
are needed for a 64-bit two's complement multiplier.
Confronted to reference algorithms for $ N = 64$,
important gain ratios of $ 1.62$, $ 1.71$, $ 2.64$ are
obtained in terms of multiply-time, energy consumption
per multiply operation, and total gate count,
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2012:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "4",
pages = "44--48",
month = sep,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2411116.2411124",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 11 08:06:57 MST 2012",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This column consists of selected traffic from the {\tt
comp.arch} newsgroup, a forum for discussion of
computer architecture on the Internet---an
international computer network. As always, the opinions
expressed in this column are the personal views of the
authors, and do not necessarily represent the
institutions to which they are affiliated. Text which
sets the context of a message appears underlined or in
italics; this is usually text the author has quoted
from earlier messages. The code-like expressions below
the authors' names are their addresses on Internet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Amano:2012:FBC,
author = "Hideharu Amano and Wayne Luk",
title = "{FPGA}-based {Connect6} solver with
hardware-accelerated move refinement",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "4--9",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460218",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "Connect6 is a two-player game similar to Go-Moku,
which was introduced in 2003. Since placing two stones
in each turn makes a huge game-tree, we require some
acceleration techniques for a solver based on a typical
approach to search the tree. This paper presents an
FPGA-based Connect6 solver with two-level move
refinement. The solver has the dedicated hardware to
accelerate the move refinement by exploiting various
parallelism with a systolic array, linear arrays, and
multiple score-calculation units. Implementation with a
low-end FPGA demonstrates that the accelerator allows
the two-level move refinement in the FPGA-based solver
running at 90 MHz to be 103695 and 414 times faster
than equivalent software implementation with NIOS II
soft processor on the FPGA and Intel Core i7 processor
operating at 2.93 GHz, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chau:2012:RRP,
author = "Thomas C. P. Chau and Wayne Luk and Peter Y. K.
Cheung",
title = "{Roberts}: reconfigurable platform for benchmarking
real-time systems",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "10--15",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460219",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This paper presents Roberts, a Reconfigurable platfOrm
for BEnchmarking Real-Time Systems. Roberts is the
first platform which can be customised for a given
system-under-test to support benchmarking of real-time
properties and energy consumption. The benchmarking
takes into account system workload and environmental
events, with facilities for generating test vectors
conforming to the specification of system under test,
and with support for on-line monitoring of the response
time, output values and energy consumption. The
proposed benchmarking platform has been implemented in
the DE4 development system to provide cycle-accurate
timing measurement at nano-second precision to analyse
high performance applications. An evaluation of our
approach shows that the platform can be used in
analysing the performance of target applications and
overheads of other timing facilities, such as the
interval timer on processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kinoshita:2012:ARS,
author = "Kei Kinoshita and Daisuke Takano and Tomoyuki Okamura
and Tetsuhiko Yao and Yoshiki Yamaguchi",
title = "An augmented reality system with a coarse-grained
reconfigurable device",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "16--21",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460220",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "Image recognition and motion tracking are widely
utilized in the field of Augmented Reality (AR).
Although their computational cost is huge, they enable
to extend the practicality and the range of
applications if all computation is processed within
real time. Toward this goal, in this paper, we propose
a handheld AR system optimized for direct hardware
computation. It includes a subspace method for image
recognition and a KLT tracking algorithm for motion
tracking. The AR system is composed of one
two-million-pixel-CCD-image sensor, one head-mounted
display, one reconfigurable device called DAPDNA-2, and
so on. DAPDNA-2 is a coarse-grained and
dynamic-reconfigurable device which is produced by
Tokyo Keiki Inc. The merit of DAPDNA-2 is its
short-reconfiguration time and it is utilised to full
for not only high performance but also the reduction of
power consumption. The experimental result through a
real Japanese-English translation system shows image
recognition and motion tracking are computed within
real-time; the computation time is less than 0.741
milliseconds per a VGA-resolution (640 x 480 pixels)
frame. Thus, we are able to find a highly efficient
computation using a coarse-grained architecture
compared with general-purpose processors and embedded
processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ng:2012:STT,
author = "Nicholas Ng and Nobuko Yoshida and Xin Yu Niu and Kuen
Hung Tsoi",
title = "Session types: towards safe and fast reconfigurable
programming",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "22--27",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460221",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This paper introduces a new programming framework
based on the theory of session types for safe,
reconfigurable parallel designs. We apply the session
type theory to C and Java programming languages and
demonstrate that the session-based languages can offer
a clear and tractable framework to describe
communications between parallel components and
guarantee communication-safety and deadlock-freedom by
compile-time type checking. Many representative
communication topologies such as a ring or
scatter-gather can be programmed and verified in
session-based programming languages. Case studies
involving N-body simulation and Kmeans clustering are
used to illustrate the session-based programming style
and to demonstrate that the session-based languages
perform competitively against MPI counterparts in an
FPGA-based heterogeneous cluster, as well as the
potential of integrating them with FPGA acceleration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Syed:2012:LOA,
author = "Rizwan Syed and Yajun Ha and Bharadwaj Veeravalli",
title = "A low overhead abstract architecture for {FPGA}
resource management",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "28--33",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460222",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "To support dynamic applications, FPGAs will need to
have a software operating system equivalent resource
manager. An abstract FPGA architecture is the
foundation to develop such an FPGA resource manager.
Previous research projects work on the FPGA abstraction
by abstracting the computing and/or the communication
resources. However, various constraints made their
proposals practically less useful due to the
performance and/or the area overheads. We develop a low
overhead abstract FPGA architecture that has the
important features such as dynamically sized
reconfigurable regions, deterministic communications
among regions, clock network management and in-circuit
debugging for regions. The architecture is demonstrated
by implementing three applications on the Xilinx Virtex
5 FPGAs. We evaluate our work by comparing the area and
performance overheads due to the abstractions between
the abstracted and the non-abstracted applications.
Experimental results show that additional resources
required due to abstractions are found to be 6.4\% on
average. This is achieved with low overheads on the
timing performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tsoi:2012:MRS,
author = "Kuen Hung Tsoi and Tobias Becker and Wayne Luk",
title = "Modelling reconfigurable systems in event driven
simulation",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "34--39",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460223",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "Reconfigurable platforms allow hardware developers to
customise their designs for specific applications.
However, their adoption involves challenges in
understanding and estimating the impact of various
design parameters and approaches. This paper proposes a
unified framework to model behaviour of reconfigurable
systems using an event driven simulation approach. This
provides an abstract yet informative method to capture
both analytical relationships and empirical parameters
of reconfigurable systems. It can be used to help
making design decisions or verifying analytical models.
We apply this approach to three models of
reconfigurable applications to estimate the
communication efficiency of networked clusters, and the
performance and energy efficiency of runtime
reconfigurable designs for software-defined radio and
for option pricing in finance. The results show that,
through this simulation framework, we can verify the
accuracy of analytical models and also obtain practical
information that is not provided by analytical
models.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shun:2012:FAC,
author = "Zheng Zhi Shun and Tsutomu Maruyama",
title = "{FPGA} acceleration of {CDO} pricing based on
correlation expansions",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "40--45",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460224",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "Because of the significant growth in the financial
market, faster and accurate pricing of widespread
instruments is becoming more important. In this paper,
we describe an FPGA implementation of an analytical
method for collateralized debt obligation (CDO) pricing
in the multifactor Normal Copula model. Our experiments
show that the FPGA system is about 40 times faster than
corresponding software on a single core 3 GHz Intel
Core2 processor.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nakahara:2012:WFF,
author = "Hiroki Nakahara and Hiroyuki Nakanishi and Tsutomu
Sasao",
title = "On a wideband {Fast Fourier Transform} for a radio
telescope",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "46--51",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460225",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "The radio telescope analyzes a radio frequency from
celestial objects by using fast Fourier transform
(FFT). In this application, its bandwidth f is wider
than that of the typical FFT. Since the amount of
hardware for the typical FFT circuit is proportional to
the bandwidth f, a special technique is necessary for
this application. This paper shows a realization of
wideband FFT for the radio telescope on an FPGA. We
show that the memory size for the conventional FFT,
which consists of the twiddle factor memory and the
transpose memory, is too large. We replace the twiddle
factor memory with the pipelined CORDIC. To reduce the
number of transpose memories, we increase the radix of
the FFT from 22 to 2k, also we use the DDR2SDRAM to
implement the transpose memory. We implement the
230-FFT on an Altera's Stratix IV GX530 FPGA. It
performs the 230-FFT operations in 1.5 seconds.
Compared with the Altera's FFT library, our FFT circuit
realizes 214 times wider bandwidth on the same FPGA.
Also, compared with Tesla S1070 utilizing four GPUs,
our FFT circuit is faster and dissipates lower power.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ling:2012:HPP,
author = "Cheng Ling and Khaled Benkrid and Tsuyoshi Hamada",
title = "High performance phylogenetic analysis on
{CUDA}-compatible {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "52--57",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460226",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "The operation of phylogenetic analysis aims to
investigate the evolution and relationships among
species. It is widely used in the fields of system
biology and comparative genomics. However, phylogenetic
analysis is also a computationally intensive operation
as the number of tree topology grows in a factorial way
with the number of species involved. Therefore, due to
the large number of species in the real world, the
computational burden has largely thwarted phylogenetic
reconstruction. In this paper, we describe the detailed
GPU-based multi-threaded design and implementation of a
Markov Chain Monte Carlo (MCMC) maximum likelihood
algorithm for phylogenetic analysis on a set of aligned
nucleotide sequences. The implementation is based on
the framework of the most widely used phylogenetic
analysis tool, namely MrBayes. The proposed approach
resulted in 6x-8x speed-up on an NVidia Geforce 460 GTX
GPU compared to an optimized GPP-based software
implementation running on a desktop computer with a
single Intel Xeon 2.53 GHz CPU and 6.0 GB RAM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lin:2012:EED,
author = "Colin Yu Lin and Hayden Kwok-Hay Kwok-Hay So",
title = "Energy-efficient dataflow computations on {FPGAs}
using application-specific coarse-grain architecture
synthesis",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "58--63",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460227",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "Compiling high-level user applications to execute on
FPGA-based reconfigurable computers often involve
synthesizing dataflow graphs beyond the capacity of the
available hardware resources. A framework that provides
rapid and energy-efficient compilation of such dataflow
graphs on FPGAs using an array of pre-placed
configurable processing elements is proposed. The
mapping schedule of the compute operations on the CPEs
and the direct network among the CPEs are
co-synthesized on a per-application basis to provide
the targeted power-performance tradeoff. Compared to
the use of a fixed generic topology, the use of an
application-specific topology derived by a genetic
algorithm can achieve up to 28\% improvement in
energy-delay product. As the CPEs are pre-placed,
compiling for a new application involve only the
generation of a new operation schedule, which is stored
in on-chip memory, and the new routes among the CPEs.
With optimization in operation scheduling and mapping
and application-specific interconnect network, the
proposed framework achieved up to 199X better
energy-delay product compared to a traditional FPGA
high-level synthesis tool xPilot. The use of such
framework is anticipated to serve as part of a
high-level application compiler for hybrid CPU-FPGA
computation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Malik:2012:ERA,
author = "Jamshaid Sarwar Malik and Paolo Palazzari and Ahmed
Hemani",
title = "Effort, resources, and abstraction vs performance in
high-level synthesis: finding new answers to an old
question",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "64--69",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460228",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This work provides new perspectives on impact of
design effort, consumed resources and design
abstraction on hardware performance in a high-level
synthesis flow. We have shown that counter to published
literature as well as intuition; more design effort may
not always result in better performance. We developed a
kernel that simulates Brownian motion, and investigated
improvement in hardware performance with design effort
at various abstraction levels. Our results indicate
that a designer should be careful in putting more
effort at a particular abstraction level. In our case,
we achieved best performance/effort ratio at algorithm
level rather than lower abstraction levels. This
strongly suggests that design effort is not always
proportional to corresponding improvement in
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kakimoto:2012:PCG,
author = "Takeshi Kakimoto and Keisuke Dohi and Yuichiro Shibata
and Kiyoshi Oguri",
title = "Performance comparison of {GPU} programming frameworks
with the striped {Smith--Waterman} algorithm",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "70--75",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460229",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This paper evaluates and discusses how different GPU
programming frameworks affect the performance obtained
from GPU acceleration of the striped smith-waterman
algorithm used for biological sequence alignment. A
total of 6 GPU implementations of the algorithm on
NVIDIA GT200b and AMD RV870 using the CUDA and the
OpenCL frameworks are compared to analyze cons and pros
of explicit descriptions for architecture specific
hardware mechanisms in the code. The evaluation results
show that the primitive descriptions with the CUDA are
still efficient especially for small size data, while
better instruction scheduling and optimizations are
carried out by the OpenCL compiler. On the other hand,
the combination of OpenCL and RV870 which provides a
relatively simple view of the architecture is efficient
for the large data size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tribino:2012:PPA,
author = "Julien Tribino and Antoine Trouv{\'e} and Hadrien A.
Clarke and Kazuaki J. Murakami",
title = "{PASTIS}: a photonic arbitration with scalable token
injection scheme",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "76--81",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460230",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This paper introduces PASTIS, a novel photonic
arbitration protocol based on a scalable token
injection scheme, and ring-based nanophotonic
technology. It aims at connecting together processors
and memories in many-core computer systems by means of
a ring topology. The main strength of PASTIS lays in
the fact that it uses photonic components exclusively,
that is, routing does not require any electronics. In
this work, we compare it with an hybrid opto-electronic
protocol as presented in a related work. Simulations
show that PASTIS performs better in terms of bandwidth,
latency and energy consumption. Indeed, it is scalable
as it can adapt its bandwidth to the system's workload,
thereby saving energy. Finally, we also study the
opportunity of using reconfigurable rings. We determine
that they almost halve the overall static power
consumption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Watanabe:2012:MCP,
author = "Takahiro Watanabe and Minoru Watanabe",
title = "$ 0.18 \mu $ m {CMOS} process high-sensitivity
optically reconfigurable gate array {VLSI}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "82--86",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460231",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "Currently, demand for high-speed dynamic
reconfiguration of a programmable device is increasing
for the purpose of increasing the performance of such
devices. To support the high speed dynamic
reconfiguration, optically reconfigurable gate arrays
(ORGAs) have been developed up to now. An ORGA consists
of a holographic memory, a laser array, and an
optically reconfigurable gate array VLSI. The
holographic memory can store many configuration
contexts. In addition, its large bandwidth optical
connection enables high speed reconfiguration. However,
photodiode sensitivities of conventional ORGAs were not
good. This paper therefore presents a newly fabricated
$ 0.18 \pi $ m CMOS process optically reconfigurable
gate array VLSI chip with highly sensitive
photociruits.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nakaya:2012:NVR,
author = "Shogo Nakaya and Makoto Miyamura and Noboru Sakimura
and Yuichi Nakamura and Tadahiko Sugibayashi",
title = "A non-volatile reconfigurable offloader for wireless
sensor nodes",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "87--92",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460232",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "Energy saving is currently one of the most important
issues in the development of battery-powered wireless
sensor nodes (WSNs). We have developed a non-volatile
reconfigurable offloader for flexible and highly
efficient processing on WSNs that uses NanoBridges
(NBs), which are novel non-volatile and reprogrammable
switching elements. Non-volatility is essential for the
intermittent operation of WSNs due to the requirement
of power-on without loading configuration data. We
implemented a data compression algorithm on the
offloader that reduces energy consumption during data
transmission. Simulation results showed that the energy
consumption on the offloader was $ 11 / 21 $ of that on
an ultra-low power CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2012:INc,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "93--112",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460234",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This column consists of selected traffic from the
comp.arch newsgroup, a forum for discussion of computer
architecture on the Internet---an international
computer network. As always, the opinions expressed in
this column are the personal views of the authors, and
do not necessarily represent the institutions to which
they are affiliated. Text which sets the context of a
message appears underlined or in italics; this is
usually text the author has quoted from earlier
messages. The code-like expressions below the authors'
names are their addresses on Internet.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bond:2013:GDG,
author = "Michael Bond",
title = "{GPUDet}: a deterministic {GPU} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "1--12",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451118",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Nondeterminism is a key challenge in developing
multithreaded applications. Even with the same input,
each execution of a multithreaded program may produce a
different output. This behavior complicates debugging
and limits one's ability to test for correctness. This
non-reproducibility situation is aggravated on
massively parallel architectures like graphics
processing units (GPUs) with thousands of concurrent
threads. We believe providing a deterministic
environment to ease debugging and testing of GPU
applications is essential to enable a broader class of
software to use GPUs. Many hardware and software
techniques have been proposed for providing determinism
on general-purpose multi-core processors. However,
these techniques are designed for small numbers of
threads. Scaling them to thousands of threads on a GPU
is a major challenge. This paper proposes a scalable
hardware mechanism, GPUDet, to provide determinism in
GPU architectures. In this paper we characterize the
existing deterministic and nondeterministic aspects of
current GPU execution models, and we use these
observations to inform GPUDet's design. For example,
GPUDet leverages the inherent determinism of the SIMD
hardware in GPUs to provide determinism within a
wavefront at no cost. GPUDet also exploits the Z-Buffer
Unit, an existing GPU hardware unit for graphics
rendering, to allow parallel out-of-order memory writes
to produce a deterministic output. Other optimizations
in GPUDet include deterministic parallel execution of
atomic operations and a workgroup-aware algorithm that
eliminates unnecessary global synchronizations. Our
simulation results indicate that GPUDet incurs only 2X
slowdown on average over a baseline nondeterministic
architecture, with runtime overheads as low as 4\% for
compute-bound applications, despite running GPU kernels
with thousands of threads. We also characterize the
sources of overhead for deterministic execution on GPUs
to provide insights for further optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Sung:2013:DEH,
author = "Hyojin Sung and Rakesh Komuravelli and Sarita V.
Adve",
title = "{DeNovoND}: efficient hardware support for disciplined
non-determinism",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "13--26",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451119",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent work has shown that disciplined shared-memory
programming models that provide
deterministic-by-default semantics can simplify both
parallel software and hardware. Specifically, the
DeNovo hardware system has shown that the software
guarantees of such models (e.g., data-race-freedom and
explicit side-effects) can enable simpler, higher
performance, and more energy-efficient hardware than
the current state-of-the-art for deterministic
programs. Many applications, however, contain
non-deterministic parts; e.g., using lock
synchronization. For commercial hardware to exploit the
benefits of DeNovo, it is therefore necessary to extend
DeNovo to support non-deterministic applications. This
paper proposes DeNovoND, a system that supports
lock-based, disciplined non-determinism, with the
simplicity, performance, and energy benefits of DeNovo.
We use a combination of distributed queue-based locks
and access signatures to implement simple memory
consistency semantics for safe non-determinism, with a
coherence protocol that does not require transient
states, invalidation traffic, or directories, and does
not incur false sharing. The resulting system is
simpler, shows comparable or better execution time, and
has 33\% less network traffic on average (translating
directly into energy savings) relative to a
state-of-the-art invalidation-based protocol for 8
applications designed for lock synchronization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Wester:2013:PDR,
author = "Benjamin Wester and David Devecsery and Peter M. Chen
and Jason Flinn and Satish Narayanasamy",
title = "Parallelizing data race detection",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "27--38",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451120",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Detecting data races in multithreaded programs is a
crucial part of debugging such programs, but
traditional data race detectors are too slow to use
routinely. This paper shows how to speed up race
detection by spreading the work across multiple cores.
Our strategy relies on uniparallelism, which executes
time intervals of a program (called epochs ) in
parallel to provide scalability, but executes all
threads from a single epoch on a single core to
eliminate locking overhead. We use several techniques
to make parallelization effective: dividing race
detection into three phases, predicting a subset of the
analysis state, eliminating sequential work via
transitive reduction, and reducing the work needed to
maintain multiple versions of analysis via
factorization. We demonstrate our strategy by
parallelizing a happens-before detector and a
lockset-based detector. We find that uniparallelism can
significantly speed up data race detection. With 4x the
number of cores as the original application, our
strategy speeds up the median execution time by 4.4x
for a happens-before detector and 3.3x for a lockset
race detector. Even on the same number of cores as the
conventional detectors, the ability for uniparallelism
to elide analysis locks allows it to reduce the median
overhead by 13\% for a happens-before detector and 8\%
for a lockset detector.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Lucia:2013:CEF,
author = "Brandon Lucia and Luis Ceze",
title = "Cooperative empirical failure avoidance for
multithreaded programs",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "39--50",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451121",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Concurrency errors in multithreaded programs are
difficult to find and fix. We propose Aviso, a system
for avoiding schedule-dependent failures. Aviso
monitors events during a program's execution and, when
a failure occurs, records a history of events from the
failing execution. It uses this history to generate
schedule constraints that perturb the order of events
in the execution and thereby avoids schedules that lead
to failures in future program executions. Aviso
leverages scenarios where many instances of the same
software run, using a statistical model of program
behavior and experimentation to determine which
constraints most effectively avoid failures. After
implementing Aviso, we showed that it decreased failure
rates for a variety of important desktop, server, and
cloud applications by orders of magnitude, with an
average overhead of less than 20\% and, in some cases,
as low as 5\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Goiri:2013:PGM,
author = "{\'I}{\~n}igo Goiri and William Katsak and Kien Le and
Thu D. Nguyen and Ricardo Bianchini",
title = "{Parasol} and {GreenSwitch}: managing datacenters
powered by renewable energy",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "51--64",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451123",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Several companies have recently announced plans to
build ``green'' datacenters, i.e. datacenters partially
or completely powered by renewable energy. These
datacenters will either generate their own renewable
energy or draw it directly from an existing nearby
plant. Besides reducing carbon footprints, renewable
energy can potentially reduce energy costs, reduce peak
power costs, or both. However, certain renewable fuels
are intermittent, which requires approaches for
tackling the energy supply variability. One approach is
to use batteries and/or the electrical grid as a backup
for the renewable energy. It may also be possible to
adapt the workload to match the renewable energy
supply. For highest benefits, green datacenter
operators must intelligently manage their workloads and
the sources of energy at their disposal. In this paper,
we first discuss the tradeoffs involved in building
green datacenters today and in the future. Second, we
present Parasol, a prototype green datacenter that we
have built as a research platform. Parasol comprises a
small container, a set of solar panels, a battery bank,
and a grid-tie. Third, we describe GreenSwitch, our
model-based approach for dynamically scheduling the
workload and selecting the source of energy to use. Our
real experiments with Parasol, GreenSwitch, and
MapReduce workloads demonstrate that intelligent
workload and energy source management can produce
significant cost reductions. Our results also isolate
the cost implications of peak power management, storing
energy on the grid, and the ability to delay the
MapReduce jobs. Finally, our results demonstrate that
careful workload and energy source management can
minimize the negative impact of electrical grid
outages.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Shen:2013:PCF,
author = "Kai Shen and Arrvindh Shriraman and Sandhya Dwarkadas
and Xiao Zhang and Zhuan Chen",
title = "Power containers: an {OS} facility for fine-grained
power and energy management on multicore servers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "65--76",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451124",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Energy efficiency and power capping are critical
concerns in server and cloud computing systems. They
face growing challenges due to dynamic power variations
from new client-directed web applications, as well as
complex behaviors due to multicore resource sharing and
hardware heterogeneity. This paper presents a new
operating system facility called ``power containers''
that accounts for and controls the power and energy
usage of individual fine-grained requests in multicore
servers. This facility relies on three key techniques
--- (1) online model that attributes multicore power
(including shared maintenance power) to concurrently
running tasks, (2) alignment of actual power
measurements and model estimates to enable online model
recalibration, and (3) on-the-fly
application-transparent request tracking in multi-stage
servers to isolate the power and energy contributions
and customize per-request control. Our mechanisms
enable new multicore server management capabilities
including fair power capping that only penalizes
power-hungry requests, and energy-aware request
distribution between heterogeneous servers. Our
evaluation uses three multicore processors (Intel
Woodcrest, Westmere, and SandyBridge) and a variety of
server and cloud computing (Google App Engine)
workloads. Our results demonstrate the high accuracy of
our request power accounting (no more than 11\% errors)
and the effectiveness of container-enabled power virus
isolation and throttling. Our request distribution case
study shows up to 25\% energy saving compared to an
alternative approach that recognizes machine
heterogeneity but not fine-grained workload affinity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Delimitrou:2013:PQA,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{Paragon}: {QoS}-aware scheduling for heterogeneous
datacenters",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "77--88",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451125",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Large-scale datacenters (DCs) host tens of thousands
of diverse applications each day. However, interference
between colocated workloads and the difficulty to match
applications to one of the many hardware platforms
available can degrade performance, violating the
quality of service (QoS) guarantees that many cloud
workloads require. While previous work has identified
the impact of heterogeneity and interference, existing
solutions are computationally intensive, cannot be
applied online and do not scale beyond few
applications. We present Paragon, an online and
scalable DC scheduler that is heterogeneity and
interference-aware. Paragon is derived from robust
analytical methods and instead of profiling each
application in detail, it leverages information the
system already has about applications it has previously
seen. It uses collaborative filtering techniques to
quickly and accurately classify an unknown, incoming
workload with respect to heterogeneity and interference
in multiple shared resources, by identifying
similarities to previously scheduled applications. The
classification allows Paragon to greedily schedule
applications in a manner that minimizes interference
and maximizes server utilization. Paragon scales to
tens of thousands of servers with marginal scheduling
overheads in terms of time or state. We evaluate
Paragon with a wide range of workload scenarios, on
both small and large-scale systems, including 1,000
servers on EC2. For a 2,500-workload scenario, Paragon
enforces performance guarantees for 91\% of
applications, while significantly improving
utilization. In comparison, heterogeneity-oblivious,
interference-oblivious and least-loaded schedulers only
provide similar guarantees for 14\%, 11\% and 3\% of
workloads. The differences are more striking in
oversubscribed scenarios where resource efficiency is
more critical.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Tang:2013:RRS,
author = "Lingjia Tang and Jason Mars and Wei Wang and Tanima
Dey and Mary Lou Soffa",
title = "{ReQoS}: reactive static\slash dynamic compilation for
{QoS} in warehouse scale computers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "89--100",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451126",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As multicore processors with expanding core counts
continue to dominate the server market, the overall
utilization of the class of datacenters known as
warehouse scale computers (WSCs) depends heavily on
colocation of multiple workloads on each server to take
advantage of the computational power provided by modern
processors. However, many of the applications running
in WSCs, such as websearch, are user-facing and have
quality of service (QoS) requirements. When multiple
applications are co-located on a multicore machine,
contention for shared memory resources threatens
application QoS as severe cross-core performance
interference may occur. WSC operators are left with two
options: either disregard QoS to maximize WSC
utilization, or disallow the co-location of
high-priority user-facing applications with other
applications, resulting in low machine utilization and
millions of dollars wasted. This paper presents ReQoS,
a static/dynamic compilation approach that enables
low-priority applications to adaptively manipulate
their own contentiousness to ensure the QoS of
high-priority co-runners. ReQoS is composed of a
profile guided compilation technique that identifies
and inserts markers in contentious code regions in
low-priority applications, and a lightweight runtime
that monitors the QoS of high-priority applications and
reactively reduces the pressure low-priority
applications generate to the memory subsystem when
cross-core interference is detected. In this work, we
show that ReQoS can accurately diagnose contention and
significantly reduce performance interference to ensure
application QoS. Applying ReQoS to SPEC2006 and
SmashBench workloads on real multicore machines, we are
able to improve machine utilization by more than 70\%
in many cases, and more than 50\% on average, while
enforcing a 90\% QoS threshold. We are also able to
improve the energy efficiency of modern multicore
machines by 47\% on average over a policy of
disallowing co-locations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Arulraj:2013:PRS,
author = "Joy Arulraj and Po-Chun Chang and Guoliang Jin and
Shan Lu",
title = "Production-run software failure diagnosis via hardware
performance counters",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "101--112",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451128",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Sequential and concurrency bugs are widespread in
deployed software. They cause severe failures and huge
financial loss during production runs. Tools that
diagnose production-run failures with low overhead are
needed. The state-of-the-art diagnosis techniques use
software instrumentation to sample program properties
at run time and use off-line statistical analysis to
identify properties most correlated with failures.
Although promising, these techniques suffer from high
run-time overhead, which is sometimes over 100\%, for
concurrency-bug failure diagnosis and hence are not
suitable for production-run usage. We present PBI, a
system that uses existing hardware performance counters
to diagnose production-run failures caused by
sequential and concurrency bugs with low overhead. PBI
is designed based on several key observations. First, a
few widely supported performance counter events can
reflect a wide variety of common software bugs and can
be monitored by hardware with almost no overhead.
Second, the counter overflow interrupt supported by
existing hardware and operating systems provides a
natural and effective mechanism to conduct event
sampling at user level. Third, the noise and
non-determinism in interrupt delivery complements well
with statistical processing. We evaluate PBI using 13
real-world concurrency and sequential bugs from
representative open-source server, client, and utility
programs, and 10 bugs from a widely used
software-testing benchmark. Quantitatively, PBI can
effectively diagnose failures caused by these bugs with
a small overhead that is never higher than 10\%.
Qualitatively, PBI does not require any change to
software and presents a novel use of existing hardware
performance counters.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Zhang:2013:CFC,
author = "Wei Zhang and Marc de Kruijf and Ang Li and Shan Lu
and Karthikeyan Sankaralingam",
title = "{ConAir}: featherweight concurrency bug recovery via
single-threaded idempotent execution",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "113--126",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451129",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many concurrency bugs are hidden in deployed software
and cause severe failures for end-users. When they
finally manifest and become known by developers, they
are difficult to fix correctly. To support end-users,
we need techniques that help software survive hidden
concurrency bugs during production runs. To help
developers, we need techniques that fix exposed
concurrency bugs. The state-of-the-art techniques on
concurrency-bug fixing and survival only satisfy a
subset of four important properties: compatibility,
correctness, generality, and performance.We aim to
develop a system that satisfies all of these four
properties. To achieve this goal, we leverage two
observations: (1) rolling back a single thread is
sufficient to recover from most concurrency-bug
failures; (2) reexecuting an idempotent region, which
requires no memory-state checkpoint, is sufficient to
recover from many concurrency-bug failures. Our system
ConAir includes a static analysis component that
automatically identifies potential failure sites, a
static analysis component that automatically identifies
the idempotent code regions around every failure site,
and a code-transformation component that inserts
rollback-recovery code around the identified idempotent
regions. We evaluated ConAir on 10 real-world
concurrency bugs in widely used C/C++ open-source
applications. These bugs cover different types of
failure symptoms and root causes. Quantitatively,
ConAir helps software survive failures caused by all of
these bugs with negligible run-time overhead ({$<$1}\%)
and short recovery time. Qualitatively, ConAir can help
recover from failures caused by unknown bugs. It
guarantees that program semantics remain unchanged and
requires no change to operating systems or hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Viennot:2013:TMR,
author = "Nicolas Viennot and Siddharth Nair and Jason Nieh",
title = "Transparent mutable replay for multicore debugging and
patch validation",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "127--138",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451130",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present Dora, a mutable record-replay system which
allows a recorded execution of an application to be
replayed with a modified version of the application.
This feature, not available in previous record-replay
systems, enables powerful new functionality. In
particular, Dora can help reproduce, diagnose, and fix
software bugs by replaying a version of a recorded
application that is recompiled with debugging
information, reconfigured to produce verbose log
output, modified to include additional print
statements, or patched to fix a bug. Dora uses
lightweight operating system mechanisms to record an
application execution by capturing nondeterministic
events to a log without imposing unnecessary timing and
ordering constraints. It replays the log using a
modified version of the application even in the
presence of added, deleted, or modified operations that
do not match events in the log. Dora searches for a
replay that minimizes differences between the log and
the replayed execution of the modified program. If
there are no modifications, Dora provides deterministic
replay of the unmodified program. We have implemented a
Linux prototype which provides transparent mutable
replay without recompiling or relinking applications.
We show that Dora is useful for reproducing,
diagnosing, and fixing software bugs in real-world
applications, including Apache and MySQL. Our results
show that Dora (1) captures bugs and replays them with
applications modified or reconfigured to produce
additional debugging output for root cause diagnosis,
(2) captures exploits and replays them with patched
applications to validate that the patches successfully
eliminate vulnerabilities, (3) records production
workloads and replays them with patched applications to
validate patches with realistic workloads, and (4)
maintains low recording overhead on commodity multicore
hardware, making it suitable for production systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Sahoo:2013:ULI,
author = "Swarup Kumar Sahoo and John Criswell and Chase Geigle
and Vikram Adve",
title = "Using likely invariants for automated software fault
localization",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "139--152",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451131",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose an automatic diagnosis technique for
isolating the root cause(s) of software failures. We
use likely program invariants, automatically generated
using correct inputs that are close to the
fault-triggering input, to select a set of candidate
program locations which are possible root causes. We
then trim the set of candidate root causes using
software-implemented dynamic backwards slicing, plus
two new filtering heuristics: dependence filtering, and
filtering via multiple failing inputs that are also
close to the failing input. Experimental results on
reported software bugs of three large open-source
servers show that we are able to narrow down the number
of candidate bug locations to between 5 and 17 program
expressions, even in programs that are hundreds of
thousands of lines long.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Paulos:2013:REA,
author = "Eric Paulos",
title = "The rise of the expert amateur: {DIY} culture and the
evolution of computer science",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "153--154",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451133",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We are at an important technological inflection point.
Most of our computing systems have been designed and
built by professionally trained experts (i.e. us ---
computer scientists, engineers, and designers) for use
in specific domains and to solve explicit problems.
Artifacts often called ``user manuals'' traditionally
prescribed the appropriate usage of these tools and
implied an acceptable etiquette for interaction and
experience. A fringe group of individuals usually
labeled ``hackers'' or ``amateurs'' or ``makers'' have
challenged this producer-consumer model of technology
by creating novel hardware and software features to
``improve'' our research and products while a similar
creative group of technicians called ``artists'' have
redirected the techniques, tools, and tenets of
accepted technological usage away from their typical
manifestations in practicality and product. Over time
the technological artifacts of these fringe groups and
the support for their rhetoric have gained them a
foothold into computing culture and eroded the
established power discontinuities within the practice
of computing research. We now expect our computing
tools to be driven by an architecture of open
participation and democracy that encourages users to
add value to their tools and applications as they use
them. Similarly, the bar for enabling the design of
novel, personal computing systems and ``hardware
remixes'' has fallen to the point where many
non-experts and novices are readily embracing and
creating fascinating and ingenious computing artifacts
outside of our official and traditionally sanctioned
academic and industrial research communities. But how
have we as ``expert'' practitioners been influencing
this discussion? By constructing a practice around the
design and development of technology for task based and
problem solving applications, we have unintentionally
established such work as the status quo for the human
computing experience. We have failed in our duty to
open up alternate forums for technology to express
itself and touch our lives beyond productivity and
efficiency. Blinded by our quest for ``smart
technologies'' we have forgotten to contemplate the
design of technologies to inspire us to be smarter,
more curious, and more inquisitive. We owe it to
ourselves to rethink the impact we desire to have on
this historic moment in computing culture. We must
choose to participate in and perhaps lead a dialogue
that heralds an expansive new acceptable practice of
designing to enable participation by experts and
non-experts alike. We are in the milieu of the rise of
the ``expert amateur''. We must change our mantra ---
not just performance, completeness, and usability but
openness, usefulness and relevancy to our world, its
citizens, and our environment. This talk will explore
elements of the DIY and maker culture and its relevancy
to research questions across computational hardware,
languages, and systems. Ultimately, this talk will
outline and argue for expanding the design territory
and potential opportunities for all of us to
collaborate and benefit as a society from this cultural
movement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Raghavan:2013:CSH,
author = "Arun Raghavan and Laurel Emurian and Lei Shao and
Marios Papaefthymiou and Kevin P. Pipe and Thomas F.
Wenisch and Milo M. K. Martin",
title = "Computational sprinting on a hardware\slash software
testbed",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "155--166",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451135",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "CMOS scaling trends have led to an inflection point
where thermal constraints (especially in mobile devices
that employ only passive cooling) preclude sustained
operation of all transistors on a chip --- a phenomenon
called ``dark silicon.'' Recent research proposed
computational sprinting --- exceeding sustainable
thermal limits for short intervals --- to improve
responsiveness in light of the bursty computation
demands of many media-rich interactive mobile
applications. Computational sprinting improves
responsiveness by activating reserve cores (parallel
sprinting) and/or boosting frequency/voltage (frequency
sprinting) to power levels that far exceed the system's
sustainable cooling capabilities, relying on thermal
capacitance to buffer heat. Prior work analyzed the
feasibility of sprinting through modeling and
simulation. In this work, we investigate sprinting
using a hardware/software testbed. First, we study
unabridged sprints, wherein the computation completes
before temperature becomes critical, demonstrating a
6.3x responsiveness gain, and a 6\% energy efficiency
improvement by racing to idle. We then analyze
truncated sprints, wherein our software runtime system
must intervene to prevent overheating by throttling
parallelism and frequency before the computation is
complete. To avoid oversubscription penalties (context
switching inefficiencies after a truncated parallel
sprint), we develop a sprint-aware task-based parallel
runtime. We find that maximal-intensity sprinting is
not always best, introduce the concept of sprint
pacing, and evaluate an adaptive policy for selecting
sprint intensity. We report initial results using a
phase change heat sink to extend maximum sprint
duration. Finally, we demonstrate that a
sprint-and-rest operating regime can actually
outperform thermally-limited sustained execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Ahn:2013:DAS,
author = "Wonsun Ahn and Yuelu Duan and Josep Torrellas",
title = "{DeAliaser}: alias speculation using atomic region
support",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "167--180",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451136",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Alias analysis is a critical component in many
compiler optimizations. A promising approach to reduce
the complexity of alias analysis is to use speculation.
The approach consists of performing optimizations
assuming the alias relationships that are true most of
the time, and repairing the code when such
relationships are found not to hold through runtime
checks. This paper proposes a general alias speculation
scheme that leverages upcoming hardware support for
transactions with the help of some ISA extensions. The
ability of transactions to checkpoint and roll back
frees the compiler to pursue aggressive optimizations
without having to worry about recovery code. Also,
exposing the memory conflict detection hardware in
transactions to software allows runtime checking of
aliases with little or no overhead. We test the
potential of the novel alias speculation approach with
Loop Invariant Code Motion (LICM), Global Value
Numbering (GVN), and Partial Redundancy Elimination
(PRE) optimization passes. On average, they are shown
to reduce program execution time by 9\% in SPEC FP2006
applications and 3\% in SPEC INT2006 applications over
the alias analysis of a state-of-the-art compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Park:2013:RCH,
author = "Heekwon Park and Seungjae Baek and Jongmoo Choi and
Donghee Lee and Sam H. Noh",
title = "Regularities considered harmful: forcing randomness to
memory accesses to reduce row buffer conflicts for
multi-core, multi-bank systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "181--192",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451137",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose a novel kernel-level memory allocator,
called M$^3$ (M-cube, Multi-core Multi-bank Memory
allocator), that has the following two features. First,
it introduces and makes use of a notion of a memory
container, which is defined as a unit of memory that
comprises the minimum number of page frames that can
cover all the banks of the memory organization, by
exclusively assigning a container to a core so that
each core achieves bank parallelism as much as
possible. Second, it orchestrates page frame allocation
so that pages that threads access are dispersed
randomly across multiple banks so that each thread's
access pattern is randomized. The development of M$^3$
is based on a tool that we develop to fully understand
the architectural characteristics of the underlying
memory organization. Using an extension of this tool,
we observe that the same application that accesses
pages in a random manner outperforms one that accesses
pages in a regular pattern such as sequential or same
ordered accesses. This is because such randomized
accesses reduces inter-thread access interference on
the row-buffer in memory banks. We implement M$^3$ in
the Linux kernel version 2.6.32 on the Intel Xeon
system that has 16 cores and 32GB DRAM. Performance
evaluation with various workloads show that M$^3$
improves the overall performance for memory intensive
benchmarks by up to 85\% with an average of about
40\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Honarmand:2013:CUA,
author = "Nima Honarmand and Nathan Dautenhahn and Josep
Torrellas and Samuel T. King and Gilles Pokam and
Cristiano Pereira",
title = "{Cyrus}: unintrusive application-level record-replay
for replay parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "193--206",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451138",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Architectures for deterministic record-replay (R\&R)
of multithreaded code are attractive for program
debugging, intrusion analysis, and fault-tolerance
uses. However, very few of the proposed designs have
focused on maximizing replay speed --- a key enabling
property of these systems. The few efforts that focus
on replay speed require intrusive hardware or software
modifications, or target whole-system R\&R rather
than the more useful application-level R\&R. This
paper presents the first hardware-based scheme for
unintrusive, application-level R\&R that explicitly
targets high replay speed. Our scheme, called Cyrus,
requires no modification to commodity snoopy cache
coherence. It introduces the concept of an on-the-fly
software Backend Pass during recording which, as the
log is being generated, transforms it for high replay
parallelism. This pass also fixes-up the log, and can
flexibly trade-off replay parallelism for log size. We
analyze the performance of Cyrus using full system (OS
plus hardware) simulation. Our results show that Cyrus
has negligible recording overhead. In addition, for
8-processor runs of SPLASH-2, Cyrus attains an average
replay parallelism of 5, and a replay speed that is, on
average, only about 50\% lower than the recording
speed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{deOliveira:2013:WYS,
author = "Augusto Born de Oliveira and Sebastian Fischmeister
and Amer Diwan and Matthias Hauswirth and Peter F.
Sweeney",
title = "Why you should care about quantile regression",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "207--218",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451140",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Research has shown that correctly conducting and
analysing computer performance experiments is
difficult. This paper investigates what is necessary to
conduct successful computer performance evaluation by
attempting to repeat a prior experiment: the comparison
between two Linux schedulers. In our efforts, we found
that exploring an experimental space through a series
of incremental experiments can be inconclusive, and
there may be no indication of how much experimentation
will be enough. Analysis of variance (ANOVA), a
traditional analysis method, is able to partly solve
the problems with the previous approach, but we
demonstrate that ANOVA can be insufficient for proper
analysis due to the requirements it imposes on the
data. Finally, we demonstrate the successful
application of quantile regression, a recent
development in statistics, to computer performance
experiments. Quantile regression can provide more
insight into the experiment than ANOVA, with the
additional benefit of being applicable to data from any
distribution. This property makes it especially useful
in our field, since non-normally distributed data is
common in computer experiments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Curtsinger:2013:SSS,
author = "Charlie Curtsinger and Emery D. Berger",
title = "{STABILIZER}: statistically sound performance
evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "219--228",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451141",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Researchers and software developers require effective
performance evaluation. Researchers must evaluate
optimizations or measure overhead. Software developers
use automatic performance regression tests to discover
when changes improve or degrade performance. The
standard methodology is to compare execution times
before and after applying changes. Unfortunately,
modern architectural features make this approach
unsound. Statistically sound evaluation requires
multiple samples to test whether one can or cannot
(with high confidence) reject the null hypothesis that
results are the same before and after. However, caches
and branch predictors make performance dependent on
machine-specific parameters and the exact layout of
code, stack frames, and heap objects. A single binary
constitutes just one sample from the space of program
layouts, regardless of the number of runs. Since
compiler optimizations and code changes also alter
layout, it is currently impossible to distinguish the
impact of an optimization from that of its layout
effects. This paper presents Stabilizer, a system that
enables the use of the powerful statistical techniques
required for sound performance evaluation on modern
architectures. Stabilizer forces executions to sample
the space of memory configurations by repeatedly
re-randomizing layouts of code, stack, and heap objects
at runtime. Stabilizer thus makes it possible to
control for layout effects. Re-randomization also
ensures that layout effects follow a Gaussian
distribution, enabling the use of statistical tests
like ANOVA. We demonstrate Stabilizer's efficiency
({$<$7}\% median overhead) and its effectiveness by
evaluating the impact of LLVM's optimizations on the
SPEC CPU2006 benchmark suite. We find that, while -O2
has a significant impact relative to -O1, the
performance impact of -O3 over -O2 optimizations is
indistinguishable from random noise.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Gidra:2013:SSS,
author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and
Marc Shapiro",
title = "A study of the scalability of stop-the-world garbage
collectors on multicores",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "229--240",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451142",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Large-scale multicore architectures create new
challenges for garbage collectors (GCs). In particular,
throughput-oriented stop-the-world algorithms
demonstrate good performance with a small number of
cores, but have been shown to degrade badly beyond
approximately 8 cores on a 48-core with OpenJDK 7. This
negative result raises the question whether the
stop-the-world design has intrinsic limitations that
would require a radically different approach. Our study
suggests that the answer is no, and that there is no
compelling scalability reason to discard the existing
highly-optimised throughput-oriented GC code on
contemporary hardware. This paper studies the default
throughput-oriented garbage collector of OpenJDK 7,
called Parallel Scavenge. We identify its bottlenecks,
and show how to eliminate them using well-established
parallel programming techniques. On the SPECjbb2005,
SPECjvm2008 and DaCapo 9.12 benchmarks, the improved GC
matches the performance of Parallel Scavenge at low
core count, but scales well, up to 48~cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{McFarlin:2013:DDO,
author = "Daniel S. McFarlin and Charles Tucker and Craig
Zilles",
title = "Discerning the dominant out-of-order performance
advantage: is it speculation or dynamism?",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "241--252",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451143",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we set out to study the performance
advantages of an Out-of-Order (OOO) processor relative
to in-order processors with similar execution
resources. In particular, we try to tease apart the
performance contributions from two sources: the
improved schedules enabled by OOO hardware speculation
support and its ability to generate different schedules
on different occurrences of the same instructions based
on operand and functional unit availability. We find
that the ability to express good static schedules
achieves the bulk of the speedup resulting from OOO.
Specifically, of the 53\% speedup achieved by OOO
relative to a similarly provisioned in- order machine,
we find that 88\% of that speedup can be achieved by
using a single ``best'' static schedule as suggested by
observing an OOO schedule of the code. We discuss the
ISA mechanisms that would be required to express these
static schedules. Furthermore, we find that the
benefits of dynamism largely come from two kinds of
events that influence the application's critical path:
load instructions that miss in the cache only part of
the time and branch mispredictions. We find that much
of the benefit of OOO dynamism can be achieved by the
potentially simpler task of addressing these two
behaviors directly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Checkoway:2013:IAW,
author = "Stephen Checkoway and Hovav Shacham",
title = "{Iago} attacks: why the system call {API} is a bad
untrusted {RPC} interface",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "253--264",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451145",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In recent years, researchers have proposed systems for
running trusted code on an untrusted operating system.
Protection mechanisms deployed by such systems keep a
malicious kernel from directly manipulating a trusted
application's state. Under such systems, the
application and kernel are, conceptually, peers, and
the system call API defines an RPC interface between
them. We introduce Iago attacks, attacks that a
malicious kernel can mount in this model. We show how a
carefully chosen sequence of integer return values to
Linux system calls can lead a supposedly protected
process to act against its interests, and even to
undertake arbitrary computation at the malicious
kernel's behest. Iago attacks are evidence that
protecting applications from malicious kernels is more
difficult than previously realized.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Hofmann:2013:ISA,
author = "Owen S. Hofmann and Sangman Kim and Alan M. Dunn and
Michael Z. Lee and Emmett Witchel",
title = "{InkTag}: secure applications on an untrusted
operating system",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "265--278",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451146",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "InkTag is a virtualization-based architecture that
gives strong safety guarantees to high-assurance
processes even in the presence of a malicious operating
system. InkTag advances the state of the art in
untrusted operating systems in both the design of its
hypervisor and in the ability to run useful
applications without trusting the operating system. We
introduce paraverification, a technique that simplifies
the InkTag hypervisor by forcing the untrusted
operating system to participate in its own
verification. Attribute-based access control allows
trusted applications to create decentralized access
control policies. InkTag is also the first system of
its kind to ensure consistency between secure data and
metadata, ensuring recoverability in the face of system
crashes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Giuffrida:2013:SAL,
author = "Cristiano Giuffrida and Anton Kuijsten and Andrew S.
Tanenbaum",
title = "Safe and automatic live update for operating systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "279--292",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451147",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Increasingly many systems have to run all the time
with no downtime allowed. Consider, for example,
systems controlling electric power plants and e-banking
servers. Nevertheless, security patches and a constant
stream of new operating system versions need to be
deployed without stopping running programs. These
factors naturally lead to a pressing demand for live
update---upgrading all or parts of the operating system
without rebooting. Unfortunately, existing solutions
require significant manual intervention and thus work
reliably only for small operating system patches. In
this paper, we describe an automated system for live
update that can safely and automatically handle major
upgrades without rebooting. We have implemented our
ideas in Proteos, a new research OS designed with live
update in mind. Proteos relies on system support and
nonintrusive instrumentation to handle even very
complex updates with minimal manual effort. The key
novelty is the idea of state quiescence, which allows
updates to happen only in safe and predictable system
states. A second novelty is the ability to
automatically perform transactional live updates at the
process level, ensuring a safe and stable update
process. Unlike prior solutions, Proteos supports
automated state transfer, state checking, and hot
rollback. We have evaluated Proteos on 50 real updates
and on novel live update scenarios. The results show
that our techniques can effectively support both simple
and complex updates, while outperforming prior
solutions in terms of flexibility, security,
reliability, and stability of the update process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Mai:2013:VSI,
author = "Haohui Mai and Edgar Pek and Hui Xue and Samuel
Talmadge King and Parthasarathy Madhusudan",
title = "Verifying security invariants in {ExpressOS}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "293--304",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451148",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Security for applications running on mobile devices is
important. In this paper we present ExpressOS, a new OS
for enabling high-assurance applications to run on
commodity mobile devices securely. Our main
contributions are a new OS architecture and our use of
formal methods for proving key security invariants
about our implementation. In our use of formal methods,
we focus solely on proving that our OS implements our
security invariants correctly, rather than striving for
full functional correctness, requiring significantly
less verification effort while still proving the
security relevant aspects of our system. We built
ExpressOS, analyzed its security, and tested its
performance. Our evaluation shows that the performance
of ExpressOS is comparable to an Android-based system.
In one test, we ran the same web browser on ExpressOS
and on an Android-based system, and found that
ExpressOS adds 16\% overhead on average to the page
load latency time for nine popular web sites.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Schkufza:2013:SS,
author = "Eric Schkufza and Rahul Sharma and Alex Aiken",
title = "Stochastic superoptimization",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "305--316",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451150",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We formulate the loop-free binary superoptimization
task as a stochastic search problem. The competing
constraints of transformation correctness and
performance improvement are encoded as terms in a cost
function, and a Markov Chain Monte Carlo sampler is
used to rapidly explore the space of all possible
programs to find one that is an optimization of a given
target program. Although our method sacrifices
completeness, the scope of programs we are able to
consider, and the resulting quality of the programs
that we produce, far exceed those of existing
superoptimizers. Beginning from binaries compiled by
llvm -O0 for 64-bit x86, our prototype implementation,
STOKE, is able to produce programs which either match
or outperform the code produced by gcc -O3, icc -O3,
and in some cases, expert handwritten assembly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Schulte:2013:ARB,
author = "Eric Schulte and Jonathan DiLorenzo and Westley Weimer
and Stephanie Forrest",
title = "Automated repair of binary and assembly programs for
cooperating embedded devices",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "317--328",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451151",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present a method for automatically repairing
arbitrary software defects in embedded systems, which
have limited memory, disk and CPU capacities, but exist
in great numbers. We extend evolutionary computation
(EC) algorithms that search for valid repairs at the
source code level to assembly and ELF format binaries,
compensating for limited system resources with several
algorithmic innovations. Our method does not require
access to the source code or build toolchain of the
software under repair, does not require program
instrumentation, specialized execution environments, or
virtual machines, or prior knowledge of the bug type.
We repair defects in ARM and x86 assembly as well as
ELF binaries, observing decreases of 86\% in memory and
95\% in disk requirements, with 62\% decrease in repair
time, compared to similar source-level techniques.
These advances allow repairs previously possible only
with C source code to be applied to any ARM or x86
assembly or ELF executable. Efficiency gains are
achieved by introducing stochastic fault localization,
with much lower overhead than comparable deterministic
methods, and low-level program representations. When
distributed over multiple devices, our algorithm finds
repairs faster than predicted by naive parallelism.
Four devices using our approach are five times more
efficient than a single device because of our
collaboration model. The algorithm is implemented on
Nokia N900 smartphones, with inter-phone communication
fitting in 900 bytes sent in 7 SMS text messages per
device per repair on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Cui:2013:VSR,
author = "Heming Cui and Gang Hu and Jingyue Wu and Junfeng
Yang",
title = "Verifying systems rules using rule-directed symbolic
execution",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "329--342",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451152",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Systems code must obey many rules, such as ``opened
files must be closed.'' One approach to verifying rules
is static analysis, but this technique cannot infer
precise runtime effects of code, often emitting many
false positives. An alternative is symbolic execution,
a technique that verifies program paths over all inputs
up to a bounded size. However, when applied to verify
rules, existing symbolic execution systems often
blindly explore many redundant program paths while
missing relevant ones that may contain bugs. Our key
insight is that only a small portion of paths are
relevant to rules, and the rest (majority) of paths are
irrelevant and do not need to be verified. Based on
this insight, we create WOODPECKER, a new symbolic
execution system for effectively checking rules on
systems programs. It provides a set of builtin checkers
for common rules, and an interface for users to easily
check new rules. It directs symbolic execution toward
the program paths relevant to a checked rule, and
soundly prunes redundant paths, exponentially speeding
up symbolic execution. It is designed to be
heuristic-agnostic, enabling users to leverage existing
powerful search heuristics. Evaluation on 136 systems
programs totaling 545K lines of code, including some of
the most widely used programs, shows that, with a time
limit of typically just one hour for each verification
run, WOODPECKER effectively verifies 28.7\% of the
program and rule combinations over bounded input,
whereas an existing symbolic execution system KLEE
verifies only 8.5\%. For the remaining combinations,
WOODPECKER verifies 4.6 times as many relevant paths as
KLEE. With a longer time limit, WOODPECKER verifies
much more paths than KLEE, e.g., 17 times as many with
a fourhour limit. WOODPECKER detects 113 rule
violations, including 10 serious data loss errors with
2 most serious ones already confirmed by the
corresponding developers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Xiang:2013:HHO,
author = "Xiaoya Xiang and Chen Ding and Hao Luo and Bin Bao",
title = "{HOTL}: a higher order theory of locality",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "343--356",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451153",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The locality metrics are many, for example, miss ratio
to test performance, data footprint to manage cache
sharing, and reuse distance to analyze and optimize a
program. It is unclear how different metrics are
related, whether one subsumes another, and what
combination may represent locality completely. This
paper first derives a set of formulas to convert
between five locality metrics and gives the condition
for correctness. The transformation is analogous to
differentiation and integration used to convert between
higher order polynomials. As a result, these metrics
can be assigned an order and organized into a
hierarchy. Using the new theory, the paper then
develops two techniques: one measures the locality in
real time without special hardware support, and the
other predicts multicore cache interference without
parallel testing. The paper evaluates them using
sequential and parallel programs as well as for a
parallel mix of sequential programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Kang:2013:HPP,
author = "Hui Kang and Jennifer L. Wong",
title = "To hardware prefetch or not to prefetch?: a
virtualized environment study and core binding
approach",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "357--368",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451155",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Most hardware and software vendors suggest disabling
hardware prefetching in virtualized environments. They
claim that prefetching is detrimental to application
performance due to inaccurate prediction caused by
workload diversity and VM interference on shared cache.
However, no comprehensive or quantitative measurements
to support this belief have been performed. This paper
is the first to systematically measure the influence of
hardware prefetching in virtualized environments. We
examine a wide variety of benchmarks on three types of
chip-multiprocessors (CMPs) to analyze the hardware
prefetching performance. We conduct extensive
experiments by taking into account a number of
important virtualization factors. We find that hardware
prefetching has minimal destructive influence under
most configurations. Only with certain application
combinations does prefetching influence the overall
performance. To leverage these findings and make
hardware prefetching effective across a diversity of
virtualized environments, we propose a dynamic
prefetching-aware VCPU-core binding approach (PAVCB),
which includes two phases --- classifying and binding.
The workload of each VM is classified into different
cache sharing constraint categories based upon its
cache access characteristics, considering both prefetch
requests and demand requests. Then following heuristic
rules, the VCPUs of each VM are scheduled onto
appropriate cores subject to cache sharing constraints.
We show that the proposed approach can improve
performance by 12\% on average over the default
scheduler and 46\% over manual system administrator
bindings across different workload combinations in the
presence of hardware prefetching.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Kim:2013:DBC,
author = "Hwanju Kim and Sangwook Kim and Jinkyu Jeong and
Joonwon Lee and Seungryoul Maeng",
title = "Demand-based coordinated scheduling for {SMP VMs}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "369--380",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451156",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As processor architectures have been enhancing their
computing capacity by increasing core counts,
independent workloads can be consolidated on a single
node for the sake of high resource efficiency in data
centers. With the prevalence of virtualization
technology, each individual workload can be hosted on a
virtual machine for strong isolation between co-located
workloads. Along with this trend, hosted applications
have increasingly been multithreaded to take advantage
of improved hardware parallelism. Although the
performance of many multithreaded applications highly
depends on communication (or synchronization) latency,
existing schemes of virtual machine scheduling do not
explicitly coordinate virtual CPUs based on their
communication behaviors. This paper presents a
demand-based coordinated scheduling scheme for
consolidated virtual machines that host multithreaded
workloads. To this end, we propose communication-driven
scheduling that controls time-sharing in response to
inter-processor interrupts (IPIs) between virtual CPUs.
On the basis of in-depth analysis on the relationship
between IPI communications and coordination demands, we
devise IPI-driven coscheduling and delayed preemption
schemes, which effectively reduce synchronization
latency and unnecessary CPU consumption. In addition,
we introduce a load-conscious CPU allocation policy in
order to address load imbalance in heterogeneously
consolidated environments. The proposed schemes are
evaluated with respect to various scenarios of mixed
workloads using the PARSEC multithreaded applications.
In the evaluation, our scheme improves the overall
performance of consolidated workloads, especially
communication-intensive applications, by reducing
inefficient synchronization latency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Dashti:2013:TMH,
author = "Mohammad Dashti and Alexandra Fedorova and Justin
Funston and Fabien Gaud and Renaud Lachaize and
Baptiste Lepers and Vivien Quema and Mark Roth",
title = "Traffic management: a holistic approach to memory
placement on {NUMA} systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "381--394",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451157",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "NUMA systems are characterized by Non-Uniform Memory
Access times, where accessing data in a remote node
takes longer than a local access. NUMA hardware has
been built since the late 80's, and the operating
systems designed for it were optimized for access
locality. They co-located memory pages with the threads
that accessed them, so as to avoid the cost of remote
accesses. Contrary to older systems, modern NUMA
hardware has much smaller remote wire delays, and so
remote access costs per se are not the main concern for
performance, as we discovered in this work. Instead,
congestion on memory controllers and interconnects,
caused by memory traffic from data-intensive
applications, hurts performance a lot more. Because of
that, memory placement algorithms must be redesigned to
target traffic congestion. This requires an arsenal of
techniques that go beyond optimizing locality. In this
paper we describe Carrefour, an algorithm that
addresses this goal. We implemented Carrefour in Linux
and obtained performance improvements of up to 3.6
relative to the default kernel, as well as significant
improvements compared to NUMA-aware patchsets available
for Linux. Carrefour never hurts performance by more
than 4\% when memory placement cannot be improved. We
present the design of Carrefour, the challenges of
implementing it on modern hardware, and draw insights
about hardware support that would help optimize system
software on future NUMA systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Jog:2013:OCT,
author = "Adwait Jog and Onur Kayiran and Nachiappan Chidambaram
Nachiappan and Asit K. Mishra and Mahmut T. Kandemir
and Onur Mutlu and Ravishankar Iyer and Chita R. Das",
title = "{OWL}: cooperative thread array aware scheduling
techniques for improving {GPGPU} performance",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "395--406",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451158",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging GPGPU architectures, along with programming
models like CUDA and OpenCL, offer a cost-effective
platform for many applications by providing high thread
level parallelism at lower energy budgets.
Unfortunately, for many general-purpose applications,
available hardware resources of a GPGPU are not
efficiently utilized, leading to lost opportunity in
improving performance. A major cause of this is the
inefficiency of current warp scheduling policies in
tolerating long memory latencies. In this paper, we
identify that the scheduling decisions made by such
policies are agnostic to thread-block, or cooperative
thread array (CTA), behavior, and as a result
inefficient. We present a coordinated CTA-aware
scheduling policy that utilizes four schemes to
minimize the impact of long memory latencies. The first
two schemes, CTA-aware two-level warp scheduling and
locality aware warp scheduling, enhance per-core
performance by effectively reducing cache contention
and improving latency hiding capability. The third
scheme, bank-level parallelism aware warp scheduling,
improves overall GPGPU performance by enhancing DRAM
bank-level parallelism. The fourth scheme employs
opportunistic memory-side prefetching to further
enhance performance by taking advantage of open DRAM
rows. Evaluations on a 28-core GPGPU platform with
highly memory-intensive applications indicate that our
proposed mechanism can provide 33\% average performance
improvement compared to the commonly-employed
round-robin warp scheduling policy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Pai:2013:IGC,
author = "Sreepathi Pai and Matthew J. Thazhuthaveetil and R.
Govindarajan",
title = "Improving {GPGPU} concurrency with elastic kernels",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "407--418",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451160",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Each new generation of GPUs vastly increases the
resources available to GPGPU programs. GPU programming
models (like CUDA) were designed to scale to use these
resources. However, we find that CUDA programs actually
do not scale to utilize all available resources, with
over 30\% of resources going unused on average for
programs of the Parboil2 suite that we used in our
work. Current GPUs therefore allow concurrent execution
of kernels to improve utilization. In this work, we
study concurrent execution of GPU kernels using
multiprogram workloads on current NVIDIA Fermi GPUs. On
two-program workloads from the Parboil2 benchmark suite
we find concurrent execution is often no better than
serialized execution. We identify that the lack of
control over resource allocation to kernels is a major
serialization bottleneck. We propose transformations
that convert CUDA kernels into elastic kernels which
permit fine-grained control over their resource usage.
We then propose several elastic-kernel aware
concurrency policies that offer significantly better
performance and concurrency compared to the current
CUDA policy. We evaluate our proposals on real hardware
using multiprogrammed workloads constructed from
benchmarks in the Parboil 2 suite. On average, our
proposals increase system throughput (STP) by 1.21x and
improve the average normalized turnaround time (ANTT)
by 3.73x for two-program workloads when compared to the
current CUDA concurrency implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Oh:2013:PAL,
author = "Taewook Oh and Hanjun Kim and Nick P. Johnson and Jae
W. Lee and David I. August",
title = "Practical automatic loop specialization",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "419--430",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451161",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Program specialization optimizes a program with
respect to program invariants, including known, fixed
inputs. These invariants can be used to enable
optimizations that are otherwise unsound. In many
applications, a program input induces predictable
patterns of values across loop iterations, yet existing
specializers cannot fully capitalize on this
opportunity. To address this limitation, we present
Invariant-induced Pattern based Loop Specialization
(IPLS), the first fully-automatic specialization
technique designed for everyday use on real
applications. Using dynamic information-flow tracking,
IPLS profiles the values of instructions that depend
solely on invariants and recognizes repeating patterns
across multiple iterations of hot loops. IPLS then
specializes these loops, using those patterns to
predict values across a large window of loop
iterations. This enables aggressive optimization of the
loop; conceptually, this optimization reconstructs
recurring patterns induced by the input as concrete
loops in the specialized binary. IPLS specializes
real-world programs that prior techniques fail to
specialize without requiring hints from the user.
Experiments demonstrate a geomean speedup of 14.1\%
with a maximum speedup of 138\% over the original codes
when evaluated on three script interpreters and eleven
scripts each.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Phothilimthana:2013:PPH,
author = "Phitchaya Mangpo Phothilimthana and Jason Ansel and
Jonathan Ragan-Kelley and Saman Amarasinghe",
title = "Portable performance on heterogeneous architectures",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "431--444",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451162",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Trends in both consumer and high performance computing
are bringing not only more cores, but also increased
heterogeneity among the computational resources within
a single machine. In many machines, one of the greatest
computational resources is now their graphics
coprocessors (GPUs), not just their primary CPUs. But
GPU programming and memory models differ dramatically
from conventional CPUs, and the relative performance
characteristics of the different processors vary widely
between machines. Different processors within a system
often perform best with different algorithms and memory
usage patterns, and achieving the best overall
performance may require mapping portions of programs
across all types of resources in the machine. To
address the problem of efficiently programming machines
with increasingly heterogeneous computational
resources, we propose a programming model in which the
best mapping of programs to processors and memories is
determined empirically. Programs define choices in how
their individual algorithms may work, and the compiler
generates further choices in how they can map to CPU
and GPU processors and memory systems. These choices
are given to an empirical autotuning framework that
allows the space of possible implementations to be
searched at installation time. The rich choice space
allows the autotuner to construct poly-algorithms that
combine many different algorithmic techniques, using
both the CPU and the GPU, to obtain better performance
than any one technique alone. Experimental results show
that algorithmic changes, and the varied use of both
CPUs and GPUs, are necessary to obtain up to a 16.5x
speedup over using a single program configuration for
all architectures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Mittal:2013:EVE,
author = "Aashish Mittal and Dushyant Bansal and Sorav Bansal
and Varun Sethi",
title = "Efficient virtualization on embedded {Power
Architecture\reg} platforms",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "445--458",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451163",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Power Architecture\reg{} processors are popular and
widespread on embedded systems, and such platforms are
increasingly being used to run virtual machines. While
the Power Architecture meets the Popek-and-Goldberg
virtualization requirements for traditional
trap-and-emulate style virtualization, the performance
overhead of virtualization remains high. For example,
workloads exhibiting a large amount of kernel activity
typically show 3-5x slowdowns over bare-metal. Recent
additions to the Linux kernel contain guest and host
side paravirtual extensions for Power Architecture
platforms. While these extensions improve performance
significantly, they are guest-specific,
guest-intrusive, and cover only a subset of all
possible virtualization optimizations. We present a set
of host-side optimizations that achieve comparable
performance to the aforementioned paravirtual
extensions, on an unmodified guest. Our optimizations
are based on adaptive in-place binary translation.
Unlike the paravirtual approach, our solution is guest
neutral. We implement our ideas in a prototype based on
Qemu/KVM. After our modifications, KVM can boot an
unmodified Linux guest around 2.5x faster. We contrast
our optimization approach with previous similar binary
translation based approaches for the x86 architecture;
in our experience, each architecture presents a unique
set of challenges and optimization opportunities.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Hill:2013:RDC,
author = "Mark D. Hill",
title = "Research directions for 21st century computer systems:
{ASPLOS 2013} panel",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "459--460",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451165",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Four recent efforts call out architectural challenges
and opportunities up and down the software/hardware
stack. This panel will discuss, ``What should the
community do to facilitate, transcend, or refute these
partially overlapping visions?'' The panel is chaired
by Mark D. Hill with other panel members not finalized
for the ASPLOS'13 proceedings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Madhavapeddy:2013:ULO,
author = "Anil Madhavapeddy and Richard Mortier and Charalampos
Rotsos and David Scott and Balraj Singh and Thomas
Gazagnaire and Steven Smith and Steven Hand and Jon
Crowcroft",
title = "Unikernels: library operating systems for the cloud",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "461--472",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451167",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present unikernels, a new approach to deploying
cloud services via applications written in high-level
source code. Unikernels are single-purpose appliances
that are compile-time specialised into standalone
kernels, and sealed against modification when deployed
to a cloud platform. In return they offer significant
reduction in image sizes, improved efficiency and
security, and should reduce operational costs. Our
Mirage prototype compiles OCaml code into unikernels
that run on commodity clouds and offer an order of
magnitude reduction in code size without significant
performance penalty. The architecture combines static
type-safety with a single address-space layout that can
be made immutable via a hypervisor extension. Mirage
contributes a suite of type-safe protocol libraries,
and our results demonstrate that the hypervisor is a
platform that overcomes the hardware compatibility
issues that have made past library operating systems
impractical to deploy in the real-world.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Kadav:2013:FGF,
author = "Asim Kadav and Matthew J. Renzelmann and Michael M.
Swift",
title = "Fine-grained fault tolerance using device
checkpoints",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "473--484",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451168",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recovering faults in drivers is difficult compared to
other code because their state is spread across both
memory and a device. Existing driver fault-tolerance
mechanisms either restart the driver and discard its
state, which can break applications, or require an
extensive logging mechanism to replay requests and
recreate driver state. Even logging may be
insufficient, though, if the semantics of requests are
ambiguous. In addition, these systems either require
large subsystems that must be kept up-to-date as the
kernel changes, or require substantial rewriting of
drivers. We present a new driver fault-tolerance
mechanism that provides fine-grained control over the
code protected. Fine-Grained Fault Tolerance (FGFT)
isolates driver code at the granularity of a single
entry point. It executes driver code as a transaction,
allowing roll back if the driver fails. We develop a
novel checkpointing mechanism to save and restore
device state using existing power management code.
Unlike past systems, FGFT can be incrementally deployed
in a single driver without the need for a large kernel
subsystem, but at the cost of small modifications to
the driver. In the evaluation, we show that FGFT can
have almost zero runtime cost in many cases, and that
checkpoint-based recovery can reduce the duration of a
failure by 79\% compared to restarting the driver.
Finally, we show that applying FGFT to a driver
requires little effort, and the majority of drivers in
common classes already contain the power-management
code needed for checkpoint/restore.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Silberstein:2013:GIF,
author = "Mark Silberstein and Bryan Ford and Idit Keidar and
Emmett Witchel",
title = "{GPUfs}: integrating a file system with {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "485--498",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451169",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "PU hardware is becoming increasingly general purpose,
quickly outgrowing the traditional but constrained
GPU-as-coprocessor programming model. To make GPUs
easier to program and easier to integrate with existing
systems, we propose making the host's file system
directly accessible from GPU code. GPUfs provides a
POSIX-like API for GPU programs, exploits GPU
parallelism for efficiency, and optimizes GPU file
access by extending the buffer cache into GPU memory.
Our experiments, based on a set of real benchmarks
adopted to use our file system, demonstrate the
feasibility and benefits of our approach. For example,
we demonstrate a simple self-contained GPU program
which searches for a set of strings in the entire tree
of Linux kernel source files over seven times faster
than an eight-core CPU run.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Hunt:2013:DTN,
author = "Nicholas Hunt and Tom Bergan and Luis Ceze and Steven
D. Gribble",
title = "{DDOS}: taming nondeterminism in distributed systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "499--508",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451170",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Nondeterminism complicates the development and
management of distributed systems, and arises from two
main sources: the local behavior of each individual
node as well as the behavior of the network connecting
them. Taming nondeterminism effectively requires
dealing with both sources. This paper proposes DDOS, a
system that leverages prior work on deterministic
multithreading to offer: (1) space-efficient
record/replay of distributed systems; and (2) fully
deterministic distributed behavior. Leveraging
deterministic behavior at each node makes outgoing
messages strictly a function of explicit inputs. This
allows us to record the system by logging just
message's arrival time, not the contents. Going
further, we propose and implement an algorithm that
makes all communication between nodes deterministic by
scheduling communication onto a global logical
timeline. We implement both algorithms in a system
called DDOS and evaluate our system with parallel
scientific applications, an HTTP/memcached system and a
distributed microbenchmark with a high volume of
peer-to-peer communication. Our results show up to two
orders of magnitude reduction in log size of
record/replay, and that distributed systems can be made
deterministic with an order of magnitude of overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Wang:2013:TEH,
author = "Cheng Wang and Youfeng Wu",
title = "{TSO\_ATOMICITY}: efficient hardware primitive for
{TSO}-preserving region optimizations",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "509--520",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451172",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Program optimizations based on data dependences may
not preserve the memory consistency in the programs.
Previous works leverage a hardware ATOMICITY primitive
to restrict the thread interleaving for preserving
sequential consistency in region optimizations.
However, ATOMICITY primitive is over restrictive on the
thread interleaving for optimizing real-world
applications developed with the popular
Total-Store-Ordering (TSO) memory consistency, which is
weaker than sequential consistency. In this paper, we
present a novel hardware TSO\_ATOMICITY primitive,
which has less restriction on the thread interleaving
than ATOMICITY primitive to permit more efficient
program execution than ATOMICITY primitive, but can
still preserve TSO memory consistency in all region
optimizations. Furthermore, TSO_ATOMICITY primitive
requires similar architecture support as ATOMICITY
primitive and can be implemented with only slight
change to the existing ATOMICITY primitive
implementation. Our experimental results show that in a
start-of-art dynamic binary optimization system on a
large set of workloads, ATOMICITY primitive can only
improve the performance by 4\% on average.
TSO_ATOMICITY primitive can reduce the overhead
associated with ATOMICITY primitive and improve the
performance by 12\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Jafri:2013:WGI,
author = "Syed Ali Raza Jafri and Gwendolyn Voskuilen and T. N.
Vijaykumar",
title = "{Wait-n-GoTM}: improving {HTM} performance by
serializing cyclic dependencies",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "521--534",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451173",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Transactional memory (TM) has been proposed to
alleviate some key programmability problems in chip
multiprocessors. Most TMs optimistically allow
concurrent transactions, detecting read-write or
write-write conflicts. Upon conflicts, existing
hardware TMs (HTMs) use one of three
conflict-resolution policies: (1) always-abort, (2)
always-wait for some conflicting transactions to
complete, or (3) always-go past conflicts and resolve
acyclic conflicts at commit or abort upon cyclic
dependencies. While each policy has advantages, the
policies degrade performance under contention by
limiting concurrency (always-abort, always-wait) or
incurring late aborts due to cyclic dependencies
(always-go). Thus, while always-go avoids acyclic
aborts, no policy avoids cyclic aborts. We propose
Wait-n-GoTM (WnGTM) to increase concurrency while
avoiding cyclic aborts. We observe that most cyclic
dependencies are caused by threads interleaving
multiple accesses to a few heavily-read-write-shared
delinquent data cache blocks. These accesses occur in
code sections called cycle inducer sections (CISTs).
Accordingly, we propose Wait-n-Go (WnG)
conflict-resolution to avoid many cyclic aborts by
predicting and serializing the CISTs. To support the
WnG policy, we extend previous HTMs to (1) allow
multiple readers and writers, (2) scalably identify
dependencies, and (3) detect cyclic dependencies via
new mechanisms, namely, conflict transactional state,
order-capture, and hardware timestamps, respectively.
In 16-core simulations of STAMP, WnGTM achieves average
speedups of 46\% for higher-contention benchmarks and
28\% for all benchmarks over always-abort (TokenTM)
with low-contention benchmarks remaining unchanged,
compared to always-go (DATM) and always-wait
(LogTM-SE), which perform worse than and 6\% better
than TokenTM, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Qian:2013:VSP,
author = "Xuehai Qian and Josep Torrellas and Benjamin Sahelices
and Depei Qian",
title = "Volition: scalable and precise sequential consistency
violation detection",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "535--548",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451174",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Sequential Consistency (SC) is the most intuitive
memory model, and SC Violations (SCVs) produce
unintuitive, typically incorrect executions. Most prior
SCV detection schemes have used data races as proxies
for SCVs, which is highly imprecise. Other schemes that
have targeted data-race cycles are either too
conservative or are designed only for two-processor
cycles and snoopy-based systems. This paper presents
Volition, the first hardware scheme that detects SCVs
in a relaxed-consistency machine precisely, in a
scalable manner, and for an arbitrary number of
processors in the cycle. Volition leverages cache
coherence protocol transactions to dynamically detect
cycles in memory-access orders across threads. When a
cycle is about to occur, an exception is triggered.
Volition can be used in both directory- and
snoopy-based coherence protocols. Our simulations of
Volition in a 64-processor multicore with
directory-based coherence running SPLASH-2 and Parsec
programs shows that Volition induces negligible traffic
and execution overhead. In addition, it can detect SCVs
with several processors. Volition is suitable for
on-the-fly use.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Grossman:2013:HSF,
author = "J. P. Grossman and Jeffrey S. Kuskin and Joseph A.
Bank and Michael Theobald and Ron O. Dror and Douglas
J. Ierardi and Richard H. Larson and U. Ben Schafer and
Brian Towles and Cliff Young and David E. Shaw",
title = "Hardware support for fine-grained event-driven
computation in {Anton 2}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "1",
pages = "549--560",
month = mar,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490301.2451175",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:40:49 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Exploiting parallelism to accelerate a computation
typically involves dividing it into many small tasks
that can be assigned to different processing elements.
An efficient execution schedule for these tasks can be
difficult or impossible to determine in advance,
however, if there is uncertainty as to when each task's
input data will be available. Ideally, each task would
run in direct response to the arrival of its input
data, thus allowing the computation to proceed in a
fine-grained event-driven manner. Realizing this ideal
is difficult in practice, and typically requires
sacrificing flexibility for performance. In Anton 2, a
massively parallel special-purpose supercomputer for
molecular dynamics simulations, we addressed this
challenge by including a hardware block, called the
dispatch unit, that provides flexible and efficient
support for fine-grained event-driven computation. Its
novel features include a many-to-many mapping from
input data to a set of synchronization counters, and
the ability to prioritize tasks based on their type. To
solve the additional problem of using a fixed set of
synchronization counters to track input data for a
potentially large number of tasks, we created a
software library that allows programmers to treat Anton
2 as an idealized machine with infinitely many
synchronization counters. The dispatch unit, together
with this library, made it possible to simplify our
molecular dynamics software by expressing it as a
collection of independent tasks, and the resulting
fine-grained execution schedule improved overall
performance by up to 16\% relative to a coarse-grained
schedule for precisely the same computation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Sinha:2013:NRA,
author = "Amitabha Sinha and Mitrava Sarkar and Soumojit
Acharyya and Suranjan Chakraborty",
title = "A novel reconfigurable architecture of a {DSP}
processor for efficient mapping of {DSP} functions
using field programmable {DSP} arrays",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "1--8",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490304",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Development of modern integrated circuit technologies
makes it feasible to develop cheaper, faster and
smaller special purpose signal processing function
circuits. Digital Signal processing functions are
generally implemented either on ASICs with
inflexibility, or on FPGAs with bottlenecks of
relatively smaller utilization factor or lower speed
compared to ASIC. Field Programmable DSP Array (FPDA)
is the proposed DSP dedicated device, redolent to FPGA,
but with basic fixed common modules (CMs) (like adders,
subtractors, multipliers, scaling units, shifters)
instead of CLBs. This paper introduces the development
of reconfigurable system architecture with a focus on
FPDA that integrates different DSP functions like DFT,
FFT, DCT, FIR, IIR, and DWT etc. The switching between
DSP functions is occurred by reconfiguring the
interconnection between CMs. Validation of the proposed
architecture has been achieved on Virtex5 FPGA. The
architecture provides sufficient amount of flexibility,
parallelism and scalability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Saha:2013:PAF,
author = "Amrita Saha and Manideepa Mukherjee and Debanjana
Datta and Sangita Saha and Amitabha Sinha",
title = "Performance analysis of a {FPGA} based novel binary
and {DBNS} multiplier",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "9--16",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490305",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Designing high performance Software Defined Radio
(SDR) with low power and flexibility is a major
challenge. While the high performance DSP processors
are unable to meet the speed requirements of these
SDRs, System on chips (SOCs) are also not suitable
because of their limited flexibility. Recently
dynamically reconfigurable FPGAs have emerged as high
performance programmable hardware to execute highly
parallel, computationally intensive signal processing
functions efficiently. Since basic intention of an SDR
is to implement different modulation / demodulation
schemes and basic building blocks for such schemes are
signal processing functions, FPGAs have become an
important component for implementing these. However,
the effectiveness of such an approach with respect to
cost, performance and flexibility need to be examined.
Double Base Number Systems (DBNS) have been gaining
attention for compute intensive applications in signal
processing because of their higher performance in
arithmetic operations in general and particularly
multiplication. Keeping these issues in view, this
paper aims to present a new Software defined Radio. To
Enhance the performance of the proposed architecture ,
analysis have been done employing both single index and
multiple indices DBNS multipliers. Experiments and
analysis on performance have also been done with its
binary counterpart. Both DBNS and binary based
architecture were implemented on Xilinx virtex iv FPGA
using xilinx ISE 9.1 i.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sartin-Tarm:2013:CCS,
author = "Michael Sartin-Tarm and Tony Nowatzki and Lorenzo {De
Carli} and Karthikeyan Sankaralingam and Cristian
Estan",
title = "Constraint centric scheduling guide",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "17--21",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490306",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The advent of architectures with software-exposed
resources (Spatial Architectures) has created a demand
for universally applicable scheduling techniques. This
paper describes our generalized spatial scheduling
framework, formulated with Integer Linear Programming,
and specifically accomplishes two goals. First, using
the ``Simple'' architecture, it illustrates how to use
our open-source tool to create a customized scheduler
and covers problem formulation with ILP and GAMS.
Second, it summarizes results on the application to
three real architectures (TRIPS,DySER,PLUG),
demonstrating the technique's practicality and
competitiveness with existing schedulers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Guha:2013:SEW,
author = "Apala Guha and Yao Zhang and Raihan ur Rasool and
Andrew A. Chien",
title = "Systematic evaluation of workload clustering for
extremely energy-efficient architectures",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "22--29",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490307",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Chip power consumption has reached its limits, leading
to the flattening of single-core performance. We
propose the $10 \times 10$ processor, a federated heterogeneous
multi-core architecture, where each core is an ensemble
of u-engines (micro-engines, similar to accelerators)
specialized for different workload groups to achieve
dramatically higher energy efficiency. The u-engines
collectively target the entire general-purpose workload
space. The problem we study in this article is
selecting the set of workloads that each u-engine
should be customized for. For this problem we study the
computation structure of a wide variety of workloads
and cluster together workloads with similar computation
structures, the idea being that each u-engine will be
customized for the compute structures exhibited by a
particular cluster. The constraint on this problem is
the silicon budget of a processor. Lower silicon
budgets accommodate fewer u-engines and require
individual u-engines to target larger segments of the
workload space which leads to lower energy efficiency
benefits from customization, because there is more
variation among the compute structures making up each
cluster. Therefore, we also study how workload coverage
and benefit can be maximized for a given silicon
budget. We study a broad general-purpose workload that
includes 34 codes from 6 benchmark suites, identifying
the most frequent functions, and clustering them based
on two sets of instruction usage features
(high-resolution and low-resolution) into 8, 16, 32,
64, 128 clusters respectively. We develop abstract
metrics (coverage and weighted customization benefit)
to evaluate the clusters. We show significant potential
payoffs with four benefit models: 2-3x (square root
model), 4-10x (linear model), 12-24x (quadratic model),
and 22-26x (cubic model).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Saha:2013:IDP,
author = "Amrita Saha and Pijush Biswas and Amitabha Sinha",
title = "An integrated development platform of a reconfigurable
radio processor for software defined radio",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "30--35",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490308",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Performance required by ``Software Defined Radio
(SDR)'' poses many challenges in real-time applications
because of their high computational complexity.
Designing a high performance SDR with a high degree of
flexibility becomes an issue of importance. While the
fastest programmable DSP processors are unable to meet
the speed requirements for SDR, FPGAs also cannot offer
the highest possible performance at the lowest silicon
cost for a given signal processing function. Moreover,
they are not optimized for radio applications because
of their LUT based approach. To overcome the
limitations of both DSP Processor and FPGAs, Radio
Processor, a reconfigurable Processor optimized for
Radio applications was conceived.[14],[17]. However,
advantages of this Radio Processor cannot be made
useful unless there is an integrated development
environment to develop SDR. This paper addresses these
issues by introducing a new Integrated Development
platform for reconfigurable ``Radio Processor'' for
implementing SDR.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pal:2013:FIN,
author = "Santanu Pal and Amitabha Sinha and Pijush Biswas",
title = "{FPGA} implementation of a novel {DCT} architecture
reducing constant cosine terms",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "36--40",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490309",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a new scalable architecture for
Discrete Cosine Transform (DCT). In contrast to the
conventional DCT architecture, the proposed
architecture reduces the number of constant cosine
terms using the matrix transposition and symmetry
property. This in turn, considerably reduces the
computation time. The architecture is scalable and it
can be extended to support any transform length. The
architecture was validated on Xilinx Vertex-4 FPGA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tseng:2013:NNE,
author = "Kuo-Kun Tseng and Fu-Fu Zeng and Huang-Nan Huang and
Yiming Liu and Jeng-Shyang Pan and W. H. Ip and C. H.
Wu",
title = "A new non-exact {Aho--Corasick} framework for {ECG}
classification",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "41--46",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490310",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The Aho--Corasick (AC) algorithm is a popular and
useful exact string matching algorithm for text
searching and deep packet inspection. However, it has
seldom been used for non-exact classification or
identification. We propose a novel framework to make
use of AC for non-exact matching in the ECG
identification. The AC classification (ACC) algorithm
converts ECG waveforms into several short patterns for
AC, and decides the identification result by AC matched
counting value. In our experiments, the results are
surprisingly good and superior to previous algorithms.
So, we designed an AC algorithm application for
non-exact classification with high accuracy. Meanwhile,
ACC inherits the advantage from AC of being capable of
handling a large pattern set with linear time
complexity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maitra:2013:HPM,
author = "Subhashis Maitra and Amitabha Sinha",
title = "High performance {MAC} unit for {DSP} and
cryptographic applications",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "47--55",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490311",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Multiplication and addition are the basic arithmetic
operation used in Digital Signal Processing (DSP) for
coefficient multiplication, scalar point multiplication
in Elliptic Curve Cryptography (ECC) and in other
fields. Multiplications are basically a shift and add
operation. However, there are many different variations
on how to do it. Some are more suitable to implement on
FPGA than others. However time complexities and
hardware complexities are the major issues in designing
a multiplier unit. There are different multiplication
algorithms in current technology. Hardware complexities
in some design are more than time complexities whereas
in some other design time complexities are more.
However there must be a tradeoff between these two
types of methodology. This paper will discuss a brief
idea how a tradeoff can be achieved. Experimental
results that have discussed here and the architecture
based on the proposed algorithm shows it's novelty.
Applications of the proposed algorithm on DSP and ECC
have been dealt here clearly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2013:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "2",
pages = "56--71",
month = may,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2490302.2490313",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jun 1 11:00:26 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Belhadj:2013:CRW,
author = "Bilel Belhadj and Antoine Joubert and Zheng Li and
Rodolphe H{\'e}liot and Olivier Temam",
title = "Continuous real-world inputs can open up alternative
accelerator designs",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "1--12",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485923",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Motivated by energy constraints, future heterogeneous
multi-cores may contain a variety of accelerators, each
targeting a subset of the application spectrum. Beyond
energy, the growing number of faults steers accelerator
research towards fault-tolerant accelerators. In this
article, we investigate a fault-tolerant and
energy-efficient accelerator for signal processing
applications. We depart from traditional designs by
introducing an accelerator which relies on unary
coding, a concept which is well adapted to the
continuous real-world inputs of signal processing
applications. Unary coding enables a number of atypical
micro-architecture choices which bring down area cost
and energy; moreover, unary coding provides graceful
output degradation as the amount of transient faults
increases. We introduce a configurable hybrid
digital/analog micro-architecture capable of
implementing a broad set of signal processing
applications based on these concepts, together with a
back-end optimizer which takes advantage of the special
nature of these applications. For a set of five signal
applications, we explore the different design tradeoffs
and obtain an accelerator with an area cost of 1.63
mm$^2$. On average, this accelerator requires only
2.3\% of the energy of an Atom-like core to implement
similar tasks. We then evaluate the accelerator
resilience to transient faults, and its ability to
trade accuracy for energy savings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Petrica:2013:FDA,
author = "Paula Petrica and Adam M. Izraelevitz and David H.
Albonesi and Christine A. Shoemaker",
title = "{Flicker}: a dynamically adaptive architecture for
power limited multicore systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "13--23",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485924",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Future microprocessors may become so power constrained
that not all transistors will be able to be powered on
at once. These systems will be required to nimbly adapt
to changes in the chip power that is allocated to
general-purpose cores and to specialized accelerators.
This paper presents Flicker, a general-purpose
multicore architecture that dynamically adapts to
varying and potentially stringent limits on allocated
power. The Flicker core microarchitecture includes
deconfigurable lanes --- horizontal slices through the
pipeline --- that permit tailoring an individual core
to the running application with lower overhead than
microarchitecture-level adaptation, and greater
flexibility than core-level power gating. To exploit
Flicker's flexible pipeline architecture, a new online
multicore optimization algorithm combines reduced
sampling techniques, application of response surface
models to online optimization, and heuristic online
search. The approach efficiently finds a
near-global-optimum configuration of lanes without
requiring offline training, microarchitecture state, or
foreknowledge of the workload. At high power
allocations, core-level gating is highly effective, and
slightly outperforms Flicker overall. However, under
stringent power constraints, Flicker significantly
outperforms core-level gating, achieving an average
27\% performance improvement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Qadeer:2013:CEB,
author = "Wajahat Qadeer and Rehan Hameed and Ofer Shacham and
Preethi Venkatesan and Christos Kozyrakis and Mark A.
Horowitz",
title = "Convolution engine: balancing efficiency \&
flexibility in specialized computing",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "24--35",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485925",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "This paper focuses on the trade-off between
flexibility and efficiency in specialized computing. We
observe that specialized units achieve most of their
efficiency gains by tuning data storage and compute
structures and their connectivity to the data-flow and
data-locality patterns in the kernels. Hence, by
identifying key data-flow patterns used in a domain, we
can create efficient engines that can be programmed and
reused across a wide range of applications. We present
an example, the Convolution Engine (CE), specialized
for the convolution-like data-flow that is common in
computational photography, image processing, and video
processing applications. CE achieves energy efficiency
by capturing data reuse patterns, eliminating data
transfer overheads, and enabling a large number of
operations per memory access. We quantify the tradeoffs
in efficiency and flexibility and demonstrate that CE
is within a factor of 2-3x of the energy and area
efficiency of custom units optimized for a single
kernel. CE improves energy and area efficiency by 8-15x
over a SIMD engine for most applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lim:2013:TSS,
author = "Kevin Lim and David Meisner and Ali G. Saidi and
Parthasarathy Ranganathan and Thomas F. Wenisch",
title = "Thin servers with smart pipes: designing {SoC}
accelerators for memcached",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "36--47",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485926",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Distributed in-memory key-value stores, such as
memcached, are central to the scalability of modern
internet services. Current deployments use commodity
servers with high-end processors. However, given the
cost-sensitivity of internet services and the recent
proliferation of volume low-power System-on-Chip (SoC)
designs, we see an opportunity for alternative
architectures. We undertake a detailed characterization
of memcached to reveal performance and power
inefficiencies. Our study considers both
high-performance and low-power CPUs and NICs across a
variety of carefully-designed benchmarks that exercise
the range of memcached behavior. We discover that,
regardless of CPU microarchitecture, memcached
execution is remarkably inefficient, saturating neither
network links nor available memory bandwidth. Instead,
we find performance is typically limited by the
per-packet processing overheads in the NIC and OS
kernel --- long code paths limit CPU performance due to
poor branch predictability and instruction fetch
bottlenecks. Our insights suggest that neither
high-performance nor low-power cores provide a
satisfactory power-performance trade-off, and point to
a need for tighter integration of the network
interface. Hence, we argue for an alternate
architecture --- Thin Servers with Smart Pipes (TSSP)
--- for cost-effective high-performance memcached
deployment. TSSP couples an embedded-class low-power
core to a memcached accelerator that can process GET
requests entirely in hardware, offloading both network
handling and data look up. We demonstrate the potential
benefits of our TSSP architecture through an FPGA
prototyping platform, and show the potential for a
6x--16x power-performance improvement over conventional
server baselines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mukundan:2013:UMR,
author = "Janani Mukundan and Hillery Hunter and Kyu-hyoun Kim
and Jeffrey Stuecheli and Jos{\'e} F. Mart{\'\i}nez",
title = "Understanding and mitigating refresh overheads in
high-density {DDR4 DRAM} systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "48--59",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485927",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Recent DRAM specifications exhibit increasing refresh
latencies. A refresh command blocks a full rank,
decreasing available parallelism in the memory
subsystem significantly, thus decreasing performance.
Fine Granularity Refresh (FGR) is a feature recently
announced as part of JEDEC's DDR4 DRAM specification
that attempts to tackle this problem by creating a
range of refresh options that provide a trade-off
between refresh latency and frequency. In this paper,
we first conduct an analysis of DDR4 DRAM's FGR
feature, and show that there is no one-size-fits-all
option across a variety of applications. We then
present Adaptive Refresh (AR), a simple yet effective
mechanism that dynamically chooses the best FGR mode
for each application and phase within the application.
When looking at the refresh problem more closely, we
identify in high-density DRAM systems a phenomenon that
we call command queue seizure, whereby the memory
controller's command queue seizes up temporarily
because it is full with commands to a rank that is
being refreshed. To attack this problem, we propose two
complementary mechanisms called Delayed Command
Expansion (DCE) and Preemptive Command Drain (PCD). Our
results show that AR does exploit DDR4's FGR
effectively. However, once our proposed DCE and PCD
mechanisms are added, DDR4's FGR becomes redundant in
most cases, except in a few highly memory-sensitive
applications, where the use of AR does provide some
additional benefit. In all, our simulations show that
the proposed mechanisms yield 8\% (14\%) mean speedup
with respect to traditional refresh, at normal
(extended) DRAM operating temperatures, for a set of
diverse parallel applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:2013:ESD,
author = "Jamie Liu and Ben Jaiyen and Yoongu Kim and Chris
Wilkerson and Onur Mutlu",
title = "An experimental study of data retention behavior in
modern {DRAM} devices: implications for retention time
profiling mechanisms",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "60--71",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485928",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "DRAM cells store data in the form of charge on a
capacitor. This charge leaks off over time, eventually
causing data to be lost. To prevent this data loss from
occurring, DRAM cells must be periodically refreshed.
Unfortunately, DRAM refresh operations waste energy and
also degrade system performance by interfering with
memory requests. These problems are expected to worsen
as DRAM density increases. The amount of time that a
DRAM cell can safely retain data without being
refreshed is called the cell's retention time. In
current systems, all DRAM cells are refreshed at the
rate required to guarantee the integrity of the cell
with the shortest retention time, resulting in
unnecessary refreshes for cells with longer retention
times. Prior work has proposed to reduce unnecessary
refreshes by exploiting differences in retention time
among DRAM cells; however, such mechanisms require
knowledge of each cell's retention time. In this paper,
we present a comprehensive quantitative study of
retention behavior in modern DRAMs. Using a
temperature-controlled FPGA-based testing platform, we
collect retention time information from 248 commodity
DDR3 DRAM chips from five major DRAM vendors. We
observe two significant phenomena: data pattern
dependence, where the retention time of each DRAM cell
is significantly affected by the data stored in other
DRAM cells, and variable retention time, where the
retention time of some DRAM cells changes unpredictably
over time. We discuss possible physical explanations
for these phenomena, how their magnitude may be
affected by DRAM technology scaling, and their
ramifications for DRAM retention time profiling
mechanisms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nair:2013:AAF,
author = "Prashant J. Nair and Dae-Hyun Kim and Moinuddin K.
Qureshi",
title = "{ArchShield}: architectural framework for assisting
{DRAM} scaling by tolerating high error rates",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "72--83",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485929",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "DRAM scaling has been the prime driver for increasing
the capacity of main memory system over the past three
decades. Unfortunately, scaling DRAM to smaller
technology nodes has become challenging due to the
inherent difficulty in designing smaller geometries,
coupled with the problems of device variation and
leakage. Future DRAM devices are likely to experience
significantly high error-rates. Techniques that can
tolerate errors efficiently can enable DRAM to scale to
smaller technology nodes. However, existing techniques
such as row/column sparing and ECC become prohibitive
at high error-rates. To develop cost-effective
solutions for tolerating high error-rates, this paper
advocates a cross-layer approach. Rather than hiding
the faulty cell information within the DRAM chips, we
expose it to the architectural level. We propose
ArchShield, an architectural framework that employs
runtime testing to identify faulty DRAM cells.
ArchShield tolerates these faults using two components,
a Fault Map that keeps information about faulty words
in a cache line, and Selective Word-Level Replication
(SWLR) that replicates faulty words for error
resilience. Both Fault Map and SWLR are integrated in
reserved area in DRAM memory. Our evaluations with 8GB
DRAM DIMM show that ArchShield can efficiently tolerate
error-rates as higher as 10$^{-4}$ (100x higher than
ECC alone), causes less than 2\% performance
degradation, and still maintains 1-bit error tolerance
against soft errors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ghose:2013:IMS,
author = "Saugata Ghose and Hyodong Lee and Jos{\'e} F.
Mart{\'\i}nez",
title = "Improving memory scheduling via processor-side load
criticality information",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "84--95",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485930",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "We hypothesize that performing processor-side analysis
of load instructions, and providing this pre-digested
information to memory schedulers judiciously, can
increase the sophistication of memory decisions while
maintaining a lean memory controller that can take
scheduling actions quickly. This is increasingly
important as DRAM frequencies continue to increase
relative to processor speed. In this paper we propose
one such mechanism, pairing up a processor-side load
criticality predictor with a lean memory controller
that prioritizes load requests based on ranking
information supplied from the processor side. Using a
sophisticated multi-core simulator that includes a
detailed quad-channel DDR3 DRAM model, we demonstrate
that this mechanism can improve performance
significantly on a CMP, with minimal overhead and
virtually no changes to the processor itself. We show
that our design compares favorably to several
state-of-the-art schedulers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Isci:2013:AEV,
author = "Canturk Isci and Suzanne McIntosh and Jeffrey Kephart
and Rajarshi Das and James Hanson and Scott Piper and
Robert Wolford and Thomas Brey and Robert Kantner and
Allen Ng and James Norris and Abdoulaye Traore and
Michael Frissora",
title = "Agile, efficient virtualization power management with
low-latency server power states",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "96--107",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485931",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "ICSA '13 conference proceedings.",
abstract = "One of the main driving forces of the growing adoption
of virtualization is its dramatic simplification of the
provisioning and dynamic management of IT resources. By
decoupling running entities from the underlying
physical resources, and by providing easy-to-use
controls to allocate, deallocate and migrate virtual
machines (VMs) across physical boundaries,
virtualization opens up new opportunities for improving
overall system resource use and power efficiency. While
a range of techniques for dynamic, distributed resource
management of virtualized systems have been proposed
and have seen their widespread adoption in enterprise
systems, similar techniques for dynamic power
management have seen limited acceptance. The main
barrier to dynamic, power-aware virtualization
management stems not from the limitations of
virtualization, but rather from the underlying physical
systems; and in particular, the high latency and energy
cost of power state change actions suited for
virtualization power management. In this work, we first
explore the feasibility of low-latency power states for
enterprise server systems and demonstrate, with real
prototypes, their quantitative energy-performance trade
offs compared to traditional server power states. Then,
we demonstrate an end-to-end power-aware virtualization
management solution leveraging these states, and
evaluate the dramatically-favorable power-performance
characteristics achievable with such systems. We
present, via both real system implementations and
scale-out simulations, that virtualization power
management with low-latency server power states can
achieve comparable overheads as base distributed
resource management in virtualized systems, and thus
can benefit from the same level of adoption, while
delivering close to energy-proportional power
efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tu:2013:SDS,
author = "Cheng-Chun Tu and Chao-tang Lee and Tzi-cker Chiueh",
title = "Secure {I/O} device sharing among virtual machines on
multiple hosts",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "108--119",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485932",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Virtualization allows flexible mappings between
physical resources and virtual entities, and improves
allocation efficiency and agility. Unfortunately, most
existing virtualization technologies are limited to
resources in a single host. This paper presents the
design, implementation and evaluation of a multi-host
I/O device virtualization system called Ladon, which
enables I/O devices to be shared among virtual machines
running on multiple hosts in a secure and efficient
way. Specifically, Ladon uses a PCIe network to connect
multiple servers with PCIe devices and allows VMs
running on these servers to directly interact with
these PCIe devices without interfering with one
another. Through an evaluation of a fully operational
Ladon prototype, we show that there is no throughput
and latency penalty of the multi-host I/O
virtualization enabled by Ladon compared to those of
the existing single-host I/O virtualization
technology.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chang:2013:IVP,
author = "Xiaotao Chang and Hubertus Franke and Yi Ge and Tao
Liu and Kun Wang and Jimi Xenidis and Fei Chen and Yu
Zhang",
title = "Improving virtualization in the presence of software
managed translation lookaside buffers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "120--129",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485933",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Virtualization has become an important technology that
is used across many platforms, particularly servers, to
increase utilization, multi-tenancy and security.
Virtualization introduces additional overhead that
often relates to memory management, interrupt handling
and hypervisor mode switching. Among those, memory
management and translation lookaside buffer (TLB)
management have been shown to have a significant impact
on the performance of systems. Two principal mechanisms
for TLB management exist in today's systems, namely
software and hardware managed TLBs. In this paper, we
analyze and quantify the overhead of a pure software
virtualization that is implemented over a software
managed TLB. We then describe our design of hardware
extensions to support virtualization in systems with
software managed TLBs to remove the most dominant
overheads. These extensions were implemented in the
Power embedded A2 core, which is used in the PowerEN
and in the Blue Gene/Q processors. They were used to
implement a KVM port. We evaluate each of these
hardware extensions to determine their overall
contributions to performance and efficiency.
Collectively these extensions demonstrate an average
improvement of 232\% over a pure software
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kim:2013:MME,
author = "Ji Kim and Christopher Torng and Shreesha Srinath and
Derek Lockhart and Christopher Batten",
title = "Microarchitectural mechanisms to exploit value
structure in {SIMT} architectures",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "130--141",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485934",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "SIMT architectures improve performance and efficiency
by exploiting control and memory-access structure
across data-parallel threads. Value structure occurs
when multiple threads operate on values that can be
compactly encoded, e.g., by using a simple function of
the thread index. We characterize the availability of
control, memory-access, and value structure in typical
kernels and observe ample amounts of value structure
that is largely ignored by current SIMT architectures.
We propose three microarchitectural mechanisms to
exploit value structure based on compact affine
execution of arithmetic, branch, and memory
instructions. We explore these mechanisms within the
context of traditional SIMT microarchitectures
(GP-SIMT), found in general-purpose graphics processing
units, as well as fine-grain SIMT microarchitectures
(FG-SIMT), a SIMT variant appropriate for
compute-focused data-parallel accelerators. Cycle-level
modeling of a modern GP-SIMT system and a VLSI
implementation of an eight-lane FG-SIMT execution
engine are used to evaluate a range of application
kernels. When compared to a baseline without compact
affine execution, our approach can improve GP-SIMT
cycle-level performance by 4-17\% and can improve
FG-SIMT absolute performance by 20-65\% and energy
efficiency up to 30\% for a majority of the kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parashar:2013:TIC,
author = "Angshuman Parashar and Michael Pellauer and Michael
Adler and Bushra Ahsan and Neal Crago and Daniel Lustig
and Vladimir Pavlov and Antonia Zhai and Mohit Gambhir
and Aamer Jaleel and Randy Allmon and Rachid Rayess and
Stephen Maresh and Joel Emer",
title = "Triggered instructions: a control paradigm for
spatially-programmed architectures",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "142--153",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485935",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "In this paper, we present triggered instructions, a
novel control paradigm for arrays of processing
elements (PEs) aimed at exploiting spatial parallelism.
Triggered instructions completely eliminate the program
counter and allow programs to transition concisely
between states without explicit branch instructions.
They also allow efficient reactivity to inter-PE
communication traffic. The approach provides a unified
mechanism to avoid over-serialized execution,
essentially achieving the effect of techniques such as
dynamic instruction reordering and multithreading,
which each require distinct hardware mechanisms in a
traditional sequential architecture. Our analysis shows
that a triggered-instruction based spatial accelerator
can achieve 8X greater area-normalized performance than
a traditional general-purpose processor. Further
analysis shows that triggered control reduces the
number of static and dynamic instructions in the
critical paths by 62\% and 64\% respectively over a
program-counter style spatial baseline, resulting in a
speedup of 2.0X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Joao:2013:UBA,
author = "Jos{\'e} A. Joao and M. Aater Suleman and Onur Mutlu
and Yale N. Patt",
title = "Utility-based acceleration of multithreaded
applications on asymmetric {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "154--165",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485936",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Asymmetric Chip Multiprocessors (ACMPs) are becoming a
reality. ACMPs can speed up parallel applications if
they can identify and accelerate code segments that are
critical for performance. Proposals already exist for
using coarse-grained thread scheduling and fine-grained
bottleneck acceleration. Unfortunately, there have been
no proposals offered thus far to decide which code
segments to accelerate in cases where both
coarse-grained thread scheduling and fine-grained
bottleneck acceleration could have value. This paper
proposes Utility-Based Acceleration of Multithreaded
Applications on Asymmetric CMPs (UBA), a cooperative
software/hardware mechanism for identifying and
accelerating the most likely critical code segments
from a set of multithreaded applications running on an
ACMP. The key idea is a new Utility of Acceleration
metric that quantifies the performance benefit of
accelerating a bottleneck or a thread by taking into
account both the criticality and the expected speedup.
UBA outperforms the best of two state-of-the-art
mechanisms by 11\% for single application workloads and
by 7\% for two-application workloads on an ACMP with 52
small cores and 3 large cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kudrow:2013:QRC,
author = "Daniel Kudrow and Kenneth Bier and Zhaoxia Deng and
Diana Franklin and Yu Tomita and Kenneth R. Brown and
Frederic T. Chong",
title = "Quantum rotations: a case study in static and dynamic
machine-code generation for quantum computers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "166--176",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485937",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Work in quantum computer architecture has focused on
communication, layout and fault tolerance, largely
driven by Shor's factorization algorithm. For the first
time, we study a larger range of benchmarks and find
that another critical issue is the generation of code
sequences for quantum rotation operations.
Specifically, quantum algorithms require arbitrary
rotation angles, while quantum technologies and error
correction codes provide only for discrete angles and
operators. A sequence of quantum machine instructions
must be generated to approximate the arbitrary rotation
to the required precision. While previous work has
focused exclusively on static compilation, we find that
some applications require dynamic code generation and
explore the advantages and disadvantages of static and
dynamic approaches. We find that static code generation
can, in some cases, lead to a terabyte of machine code
to support required rotations. We also find that some
rotation angles are unknown until run time, requiring
dynamic code generation. Dynamic code generation,
however, exhibits significant trade-offs in terms of
time overhead versus code size. Furthermore, dynamic
code generation will be performed on classical
(non-quantum) computing resources, which may or may not
have a clock speed advantage over the target quantum
technology. For example, operations on trapped ions run
at kilohertz speeds, but superconducting qubits run at
gigahertz speeds. We introduce a new method for
compiling arbitrary rotations dynamically, designed to
minimize compilation time. The new method reduces
compilation time by up to five orders of magnitude
while increasing code size by one order of magnitude.
We explore the design space formed by these trade-offs
of dynamic versus static code generation, code quality,
and quantum technology. We introduce several techniques
to provide smoother trade-offs for dynamic code
generation and evaluate the viability of options in the
design space.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Muscat:2013:DBM,
author = "Richard A. Muscat and Karin Strauss and Luis Ceze and
Georg Seelig",
title = "{DNA}-based molecular architecture with spatially
localized components",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "177--188",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485938",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Performing computation inside living cells offers
life-changing applications, from improved medical
diagnostics to better cancer therapy to intelligent
drugs. Due to its bio-compatibility and ease of
engineering, one promising approach for performing
in-vivo computation is DNA strand displacement. This
paper introduces computer architects to DNA strand
displacement ``circuits'', discusses associated
architectural challenges, and proposes a new
organization that provides practical composability. In
particular, prior approaches rely mostly on stochastic
interaction of freely diffusing components. This paper
proposes practical spatial isolation of components,
leading to more easily designed DNA-based circuits. DNA
nanotechnology is currently at a turning point, with
many proposed applications being realized [20, 9]. We
believe that it is time for the computer architecture
community to take notice and contribute.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Guo:2013:ADA,
author = "Qing Guo and Xiaochen Guo and Ravi Patel and Engin
Ipek and Eby G. Friedman",
title = "{AC-DIMM}: associative computing with {STT-MRAM}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "189--200",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485939",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "With technology scaling, on-chip power dissipation and
off-chip memory bandwidth have become significant
performance bottlenecks in virtually all computer
systems, from mobile devices to supercomputers. An
effective way of improving performance in the face of
bandwidth and power limitations is to rely on
associative memory systems. Recent work on a PCM-based,
associative TCAM accelerator shows that associative
search capability can reduce both off-chip bandwidth
demand and overall system energy. Unfortunately,
previously proposed resistive TCAM accelerators have
limited flexibility: only a restricted (albeit
important) class of applications can benefit from a
TCAM accelerator, and the implementation is confined to
resistive memory technologies with a high dynamic range
( {R$_{High}$} /{R$_{Low}$} ), such as PCM. This work
proposes AC-DIMM, a flexible, high-performance
associative compute engine built on a DDR3-compatible
memory module. AC-DIMM addresses the limited
flexibility of previous resistive TCAM accelerators by
combining two powerful capabilities --- associative
search and processing in memory. Generality is improved
by augmenting a TCAM system with a set of integrated,
user programmable microcontrollers that operate
directly on search results, and by architecting the
system such that key-value pairs can be co-located in
the same TCAM row. A new, bit-serial TCAM array is
proposed, which enables the system to be implemented
using STT-MRAM. AC-DIMM achieves a 4.2X speedup and a
6.5X energy reduction over a conventional RAM-based
system on a set of 13 evaluated applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hechtman:2013:EMC,
author = "Blake A. Hechtman and Daniel J. Sorin",
title = "Exploring memory consistency for massively-threaded
throughput-oriented processors",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "201--212",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485940",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "We re-visit the issue of hardware consistency models
in the new context of massively-threaded
throughput-oriented processors (MTTOPs). A prominent
example of an MTTOP is a GPGPU, but other examples
include Intel's MIC architecture and some recent
academic designs. MTTOPs differ from CPUs in many
significant ways, including their ability to tolerate
latency, their memory system organization, and the
characteristics of the software they run. We compare
implementations of various hardware consistency models
for MTTOPs in terms of performance, energy-efficiency,
hardware complexity, and programmability. Our results
show that the choice of hardware consistency model has
a surprisingly minimal impact on performance and thus
the decision should be based on hardware complexity,
energy-efficiency, and programmability. For many
MTTOPs, it is likely that even a simple implementation
of sequential consistency is attractive.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Duan:2013:WTM,
author = "Yuelu Duan and Abdullah Muzahid and Josep Torrellas",
title = "{WeeFence}: toward making fences free in {TSO}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "213--224",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485941",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Although fences are designed for low-overhead
concurrency coordination, they can be expensive in
current machines. If fences were largely free, faster
fine-grained concurrent algorithms could be devised,
and compilers could guarantee Sequential Consistency
(SC) at little cost. In this paper, we present WeeFence
(or WFence for short), a fence that is very cheap
because it allows post-fence accesses to skip it. Such
accesses can typically complete and retire before the
pre-fence writes have drained from the write buffer.
Only when an incorrect reordering of accesses is about
to happen, does the hardware stall to prevent it. In
the paper, we present the WFence design for TSO, and
compare it to a conventional fence with speculation for
8-processor multicore simulations. We run parallel
kernels that contain explicit fences and parallel
applications that do not. For the kernels, WFence
eliminates nearly all of the fence stall, reducing the
kernels' execution time by an average of 11\%. For the
applications, a conservative compiler algorithm places
fences in the code to guarantee SC. In this case, on
average, WFences reduce the resulting fence overhead
from 38\% of the applications' execution time to 2\%
(in a centralized WFence design), or from 36\% to 5\%
(in a distributed WFence design).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cain:2013:RAS,
author = "Harold W. Cain and Maged M. Michael and Brad Frey and
Cathy May and Derek Williams and Hung Le",
title = "Robust architectural support for transactional memory
in the {Power} architecture",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "225--236",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485942",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "On the twentieth anniversary of the original
publication [10], following ten years of intense
activity in the research literature, hardware support
for transactional memory (TM) has finally become a
commercial reality, with HTM-enabled chips currently or
soon-to-be available from many hardware vendors. In
this paper we describe architectural support for TM
added to a future version of the Power ISA{\TM}. Two
imperatives drove the development: the desire to
complement our weakly-consistent memory model with a
more friendly interface to simplify the development and
porting of multithreaded applications, and the need for
robustness beyond that of some early implementations.
In the process of commercializing the feature, we had
to resolve some previously unexplored interactions
between TM and existing features of the ISA, for
example translation shootdown, interrupt handling,
atomic read-modify-write primitives, and our weakly
consistent memory model. We describe these
interactions, the overall architecture, and discuss the
motivation and rationale for our choices of
architectural semantics, beyond what is typically found
in reference manuals.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Basu:2013:EVM,
author = "Arkaprava Basu and Jayneel Gandhi and Jichuan Chang
and Mark D. Hill and Michael M. Swift",
title = "Efficient virtual memory for big memory servers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "237--248",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485943",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Our analysis shows that many ``big-memory'' server
workloads, such as databases, in-memory caches, and
graph analytics, pay a high cost for page-based virtual
memory. They consume as much as 10\% of execution
cycles on TLB misses, even using large pages. On the
other hand, we find that these workloads use read-write
permission on most pages, are provisioned not to swap,
and rarely benefit from the full flexibility of
page-based virtual memory. To remove the TLB miss
overhead for big-memory workloads, we propose mapping
part of a process's linear virtual address space with a
direct segment, while page mapping the rest of the
virtual address space. Direct segments use minimal
hardware --- base, limit and offset registers per core
--- to map contiguous virtual memory regions directly
to contiguous physical memory. They eliminate the
possibility of TLB misses for key data structures such
as database buffer pools and in-memory key-value
stores. Memory mapped by a direct segment may be
converted back to paging when needed. We prototype
direct-segment software support for x86-64 in Linux and
emulate direct-segment hardware. For our workloads,
direct segments eliminate almost all TLB misses and
reduce the execution time wasted on TLB misses to less
than 0.5\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wu:2013:NBD,
author = "Lisa Wu and Raymond J. Barker and Martha A. Kim and
Kenneth A. Ross",
title = "Navigating big data with high-throughput,
energy-efficient data partitioning",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "249--260",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485944",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "The global pool of data is growing at 2.5 quintillion
bytes per day, with 90\% of it produced in the last two
years alone [24]. There is no doubt the era of big data
has arrived. This paper explores targeted deployment of
hardware accelerators to improve the throughput and
energy efficiency of large-scale data processing. In
particular, data partitioning is a critical operation
for manipulating large data sets. It is often the
limiting factor in database performance and represents
a significant fraction of the overall runtime of large
data queries. To accelerate partitioning, this paper
describes a hardware accelerator for range
partitioning, or HARP, and a hardware-software data
streaming framework. The streaming framework offers a
seamless execution environment for streaming
accelerators such as HARP. Together, HARP and the
streaming framework provide an order of magnitude
improvement in partitioning performance and energy. A
detailed analysis of a 32 nm physical design shows 7.8
times the throughput of a highly optimized and
optimistic software implementation, while consuming
just 6.9\% of the area and 4.3\% of the power of a
single Xeon core in the same technology generation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chung:2013:LBD,
author = "Eric S. Chung and John D. Davis and Jaewon Lee",
title = "{LINQits}: big data on little clients",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "261--272",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485945",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "We present LINQits, a flexible hardware template that
can be mapped onto programmable logic or ASICs in a
heterogeneous system-on-chip for a mobile device or
server. Unlike fixed-function accelerators, LINQits
accelerates a domain-specific query language called
LINQ. LINQits does not provide coverage for all
possible applications --- however, existing
applications (re-)written with LINQ in mind benefit
extensively from hardware acceleration. Furthermore,
the LINQits framework offers a graceful and transparent
migration path from software to hardware. LINQits is
prototyped on a 2W heterogeneous SoC called the ZYNQ
processor, which combines dual ARM A9 processors with
an FPGA on a single die in 28nm silicon technology. Our
physical measurements show that LINQits improves energy
efficiency by 8.9 to 30.6 times and performance by 10.7
to 38.1 times compared to optimized, multithreaded C
programs running on conventional ARM A9 processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Atta:2013:SBI,
author = "Islam Atta and Pinar T{\"o}z{\"u}n and Xin Tong and
Anastasia Ailamaki and Andreas Moshovos",
title = "{STREX}: boosting instruction cache reuse in {OLTP}
workloads through stratified transaction execution",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "273--284",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485946",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Online transaction processing (OLTP) workload
performance suffers from instruction stalls; the
instruction footprint of a typical transaction exceeds
by far the capacity of an L1 cache, leading to ongoing
cache thrashing. Several proposed techniques remove
some instruction stalls in exchange for error-prone
instrumentation to the code base, or a sharp increase
in the L1-I cache unit area and power. Others reduce
instruction miss latency by better utilizing a shared
L2 cache. SLICC [2], a recently proposed thread
migration technique that exploits transaction
instruction locality, is promising for high core counts
but performs sub-optimally or may hurt performance when
running on few cores. This paper corroborates that OLTP
transactions exhibit significant intra- and
inter-thread overlap in their instruction footprint,
and analyzes the instruction stall reduction benefits.
This paper presents STREX, a hardware,
programmer-transparent technique that exploits typical
transaction behavior to improve instruction reuse in
first level caches. STREX time-multiplexes the
execution of similar transactions dynamically on a
single core so that instructions fetched by one
transaction are reused by all other transactions
executing in the system as much as possible. STREX
dynamically slices the execution of each transaction
into cache-sized segments simply by observing when
blocks are brought in the cache and when they are
evicted. Experiments show that, when compared to
baseline execution on 2--16 cores, STREX consistently
improves performance while reducing the number of L1
instruction and data misses by 37\% and 14\% on
average, respectively. Finally, this paper proposes a
practical hybrid technique that combines STREX and
SLICC, thereby guaranteeing performance benefits
regardless of the number of available cores and the
workload's footprint.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Paul:2013:CBN,
author = "Indrani Paul and Srilatha Manne and Manish Arora and
W. Lloyd Bircher and Sudhakar Yalamanchili",
title = "Cooperative boosting: needy versus greedy power
management",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "285--296",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485947",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "This paper examines the interaction between thermal
management techniques and power boosting in a
state-of-the-art heterogeneous processor consisting of
a set of CPU and GPU cores. We show that for classes of
applications that utilize both the CPU and the GPU,
modern boost algorithms that greedily seek to convert
thermal headroom into performance can interact with
thermal coupling effects between the CPU and the GPU to
degrade performance. We first examine the causes of
this behavior and explain the interaction between
thermal coupling, performance coupling, and workload
behavior. Then we propose a dynamic power-management
approach called cooperative boosting (CB) to allocate
power dynamically between CPU and GPU in a manner that
balances thermal coupling against the needs of
performance coupling to optimize performance under a
given thermal constraint. Through real hardware-based
measurements, we evaluate CB against a
state-of-the-practice boost algorithm and show that
overall application performance and power savings
increase by 10\% and 8\% (up to 52\% and 34\%),
respectively, resulting in average energy efficiency
improvement of 25\% (up to 76\%) over a wide range of
benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Bacha:2013:DRV,
author = "Anys Bacha and Radu Teodorescu",
title = "Dynamic reduction of voltage margins by leveraging
on-chip {ECC} in {Itanium II} processors",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "297--307",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485948",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Lowering supply voltage is one of the most effective
approaches for improving the energy efficiency of
microprocessors. Unfortunately, technology limitations,
such as process variability and circuit aging, are
forcing microprocessor designers to add larger voltage
guardbands to their chips. This makes supply voltage
increasingly difficult to scale with technology. This
paper presents a new mechanism for dynamically reducing
voltage margins while maintaining the chip operating
frequency constant. Unlike previous approaches that
rely on special hardware to detect and recover from
timing violations caused by low-voltage execution, our
solution is firmware-based and does not require
additional hardware. Instead, it relies on error
correction mechanisms already built into modern
processors. The system dynamically reduces voltage
margins and uses correctable error reports raised by
the hardware to identify the lowest, safe operating
voltage. The solution adapts to core-to-core
variability by tailoring supply voltage to each core's
safe operating level. In addition, it exploits
variability in workload vulnerability to low voltage
execution. The system was prototyped on an HP Integrity
Server that uses Intel's Itanium 9560 processors.
Evaluation using SPECjbb2005 and SPEC CPU2000 workloads
shows core power savings ranging from 18\% to 23\%,
with minimal performance impact.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cook:2013:HEC,
author = "Henry Cook and Miquel Moreto and Sarah Bird and Khanh
Dao and David A. Patterson and Krste Asanovic",
title = "A hardware evaluation of cache partitioning to improve
utilization and energy-efficiency while preserving
responsiveness",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "308--319",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485949",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Computing workloads often contain a mix of
interactive, latency-sensitive foreground applications
and recurring background computations. To guarantee
responsiveness, interactive and batch applications are
often run on disjoint sets of resources, but this
incurs additional energy, power, and capital costs. In
this paper, we evaluate the potential of hardware cache
partitioning mechanisms and policies to improve
efficiency by allowing background applications to run
simultaneously with interactive foreground
applications, while avoiding degradation in interactive
responsiveness. We evaluate these tradeoffs using
commercial x86 multicore hardware that supports cache
partitioning, and find that real hardware measurements
with full applications provide different observations
than past simulation-based evaluations. Co-scheduling
applications without LLC partitioning leads to a 10\%
energy improvement and average throughput improvement
of 54\% compared to running tasks separately, but can
result in foreground performance degradation of up to
34\% with an average of 6\%. With optimal static LLC
partitioning, the average energy improvement increases
to 12\% and the average throughput improvement to 60\%,
while the worst case slowdown is reduced noticeably to
7\% with an average slowdown of only 2\%. We also
evaluate a practical low-overhead dynamic algorithm to
control partition sizes, and are able to realize the
potential performance guarantees of the optimal static
approach, while increasing background throughput by an
additional 19\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Das:2013:CEP,
author = "Reetuparna Das and Satish Narayanasamy and Sudhir K.
Satpathy and Ronald G. Dreslinski",
title = "{Catnap}: energy proportional multiple
network-on-chip",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "320--331",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485950",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Multiple networks have been used in several processor
implementations to scale bandwidth and ensure
protocol-level deadlock freedom for different message
classes. In this paper, we observe that a
multiple-network design is also attractive from a power
perspective and can be leveraged to achieve energy
proportionality by effective power gating. Unlike a
single-network design, a multiple-network design is
more amenable to power gating, as its subnetworks
(subnets) can be power gated without compromising the
connectivity of the network. To exploit this
opportunity, we propose the Catnap architecture which
consists of synergistic subnet selection and
power-gating policies. Catnap maximizes the number of
consecutive idle cycles in a router, while avoiding
performance loss due to overloading a subnet. We
evaluate a 256-core processor with a concentrated mesh
topology using synthetic traffic and 35 applications.
We show that the average network power of a
power-gating optimized multiple-network design with
four subnets could be 44\% lower than a bandwidth
equivalent single-network design for an average
performance cost of about 5\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jog:2013:OSP,
author = "Adwait Jog and Onur Kayiran and Asit K. Mishra and
Mahmut T. Kandemir and Onur Mutlu and Ravishankar Iyer
and Chita R. Das",
title = "Orchestrated scheduling and prefetching for {GPGPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "332--343",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485951",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "In this paper, we present techniques that coordinate
the thread scheduling and prefetching decisions in a
General Purpose Graphics Processing Unit (GPGPU)
architecture to better tolerate long memory latencies.
We demonstrate that existing warp scheduling policies
in GPGPU architectures are unable to effectively
incorporate data prefetching. The main reason is that
they schedule consecutive warps, which are likely to
access nearby cache blocks and thus prefetch accurately
for one another, back-to-back in consecutive cycles.
This either (1) causes prefetches to be generated by a
warp too close to the time their corresponding
addresses are actually demanded by another warp, or (2)
requires sophisticated prefetcher designs to correctly
predict the addresses required by a future
``far-ahead'' warp while executing the current warp. We
propose a new prefetch-aware warp scheduling policy
that overcomes these problems. The key idea is to
separate in time the scheduling of consecutive warps
such that they are not executed back-to-back. We show
that this policy not only enables a simple prefetcher
to be effective in tolerating memory latencies but also
improves memory bank parallelism, even when prefetching
is not employed. Experimental evaluations across a
diverse set of applications on a 30-core simulated
GPGPU platform demonstrate that the prefetch-aware warp
scheduler provides 25\% and 7\% average performance
improvement over baselines that employ prefetching in
conjunction with, respectively, the commonly-employed
round-robin scheduler or the recently-proposed
two-level warp scheduler. Moreover, when prefetching is
not employed, the prefetch-aware warp scheduler
provides higher performance than both of these baseline
schedulers as it better exploits memory bank
parallelism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jing:2013:EES,
author = "Naifeng Jing and Yao Shen and Yao Lu and Shrikanth
Ganapathy and Zhigang Mao and Minyi Guo and Ramon Canal
and Xiaoyao Liang",
title = "An energy-efficient and scalable {eDRAM}-based
register file architecture for {GPGPU}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "344--355",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485952",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "The heavily-threaded data processing demands of
streaming multiprocessors (SM) in a GPGPU require a
large register file (RF). The fast increasing size of
the RF makes the area cost and power consumption
unaffordable for traditional SRAM designs in the future
technologies. In this paper, we propose to use
embedded-DRAM (eDRAM) as an alternative in future
GPGPUs. Compared with SRAM, eDRAM provides higher
density and lower leakage power. However, the limited
data retention time in eDRAM poses new challenges.
Periodic refresh operations are needed to maintain data
integrity. This is exacerbated with the scaling of
eDRAM density, process variations and temperature.
Unlike conventional CPUs which make use of multi-ported
RF, most of the RFs in modern GPGPU are heavily banked
but not multi-ported to reduce the hardware cost. This
provides a unique opportunity to hide the refresh
overhead. We propose two different eDRAM
implementations based on 3T1D and 1T1C memory cells. To
mitigate the impact of periodic refresh, we propose two
novel refresh solutions using bank bubble and bank
walk-through. Plus, for the 1T1C RF, we design an
interleaved bank organization together with an
intelligent warp scheduling strategy to reduce the
impact of the destructive reads. The analysis shows
that our schemes present better energy efficiency,
scalability and variation tolerance than traditional
SRAM-based designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Rhu:2013:MSR,
author = "Minsoo Rhu and Mattan Erez",
title = "Maximizing {SIMD} resource utilization in {GPGPUs}
with {SIMD} lane permutation",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "356--367",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485953",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Current GPUs maintain high programmability by
abstracting the SIMD nature of the hardware as
independent concurrent threads of control with hardware
responsible for generating predicate masks to utilize
the SIMD hardware for different flows of control. This
dynamic masking leads to poor utilization of SIMD
resources when the control of different threads in the
same SIMD group diverges. Prior research suggests that
SIMD groups be formed dynamically by compacting a large
number of threads into groups, mitigating the impact of
divergence. To maintain hardware efficiency, however,
the alignment of a thread to a SIMD lane is fixed,
limiting the potential for compaction. We observe that
control frequently diverges in a manner that prevents
compaction because of the way in which the fixed
alignment of threads to lanes is done. This paper
presents an in-depth analysis on the causes for
ineffective compaction. An important observation is
that in many cases, control diverges because of
programmatic branches, which do not depend on input
data. This behavior, when combined with the default
mapping of threads to lanes, severely restricts
compaction. We then propose SIMD lane permutation (SLP)
as an optimization to expand the applicability of
compaction in such cases of lane alignment. SLP seeks
to rearrange how threads are mapped to lanes to allow
even programmatic branches to be compacted effectively,
improving SIMD utilization up to 34\% accompanied by a
maximum 25\% performance boost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vaidya:2013:SDO,
author = "Aniruddha S. Vaidya and Anahita Shayesteh and Dong
Hyuk Woo and Roy Saharoy and Mani Azimi",
title = "{SIMD} divergence optimization through intra-warp
compaction",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "368--379",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485954",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "SIMD execution units in GPUs are increasingly used for
high performance and energy efficient acceleration of
general purpose applications. However, SIMD control
flow divergence effects can result in reduced execution
efficiency in a class of GPGPU applications, classified
as divergent applications. Improving SIMD efficiency,
therefore, has the potential to bring significant
performance and energy benefits to a wide range of such
data parallel applications. Recently, the SIMD
divergence problem has received increased attention,
and several micro-architectural techniques have been
proposed to address various aspects of this problem.
However, these techniques are often quite complex and,
therefore, unlikely candidates for practical
implementation. In this paper, we propose two
micro-architectural optimizations for GPGPU
architectures, which utilize relatively simple
execution cycle compression techniques when certain
groups of turned-off lanes exist in the instruction
stream. We refer to these optimizations as basic cycle
compression (BCC) and swizzled-cycle compression (SCC),
respectively. In this paper, we will outline the
additional requirements for implementing these
optimizations in the context of the studied GPGPU
architecture. Our evaluations with divergent SIMD
workloads from OpenCL (GPGPU) and OpenGL (graphics)
applications show that BCC and SCC reduce execution
cycles in divergent applications by as much as 42\%
(20\% on average). For a subset of divergent workloads,
the execution time is reduced by an average of 7\% for
today's GPUs or by 18\% for future GPUs with a better
provisioned memory subsystem. The key contribution of
our work is in simplifying the micro-architecture for
delivering divergence optimizations while providing the
bulk of the benefits of more complex approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Son:2013:RMA,
author = "Young Hoon Son and O. Seongil and Yuhwan Ro and Jae W.
Lee and Jung Ho Ahn",
title = "Reducing memory access latency with asymmetric {DRAM}
bank organizations",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "380--391",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485955",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "DRAM has been a de facto standard for main memory, and
advances in process technology have led to a rapid
increase in its capacity and bandwidth. In contrast,
its random access latency has remained relatively
stagnant, as it is still around 100 CPU clock cycles.
Modern computer systems rely on caches or other latency
tolerance techniques to lower the average access
latency. However, not all applications have ample
parallelism or locality that would help hide or reduce
the latency. Moreover, applications' demands for memory
space continue to grow, while the capacity gap between
last-level caches and main memory is unlikely to
shrink. Consequently, reducing the main-memory latency
is important for application performance.
Unfortunately, previous proposals have not adequately
addressed this problem, as they have focused only on
improving the bandwidth and capacity or reduced the
latency at the cost of significant area overhead. We
propose asymmetric DRAM bank organizations to reduce
the average main-memory access latency. We first
analyze the access and cycle times of a modern DRAM
device to identify key delay components for latency
reduction. Then we reorganize a subset of DRAM banks to
reduce their access and cycle times by half with low
area overhead. By synergistically combining these
reorganized DRAM banks with support for non-uniform
bank accesses, we introduce a novel DRAM bank
organization with center high-aspect-ratio mats called
CHARM. Experiments on a simulated chip-multiprocessor
system show that CHARM improves both the instructions
per cycle and system-wide energy-delay product up to
21\% and 32\%, respectively, with only a 3\% increase
in die area.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Liu:2013:CTP,
author = "Ziyi Liu and JongHyuk Lee and Junyuan Zeng and
Yuanfeng Wen and Zhiqiang Lin and Weidong Shi",
title = "{CPU} transparent protection of {OS} kernel and
hypervisor integrity with programmable {DRAM}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "392--403",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485956",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Increasingly, cyber attacks (e.g., kernel rootkits)
target the inner rings of a computer system, and they
have seriously undermined the integrity of the entire
computer systems. To eliminate these threats, it is
imperative to develop innovative solutions running
below the attack surface. This paper presents MGuard, a
new most inner ring solution for inspecting the system
integrity that is directly integrated with the DRAM
DIMM devices. More specifically, we design a
programmable guard that is integrated with the advanced
memory buffer of FB-DIMM to continuously monitor all
the memory traffic and detect the system integrity
violations. Unlike the existing approaches that are
either snapshot-based or lack compatibility and
flexibility, MGuard continuously monitors the integrity
of all the outer rings including both OS kernel and
hypervisor of interest, with a greater extendibility
enabled by a programmable interface. It offers a
hardware drop-in solution transparent to the host CPU
and memory controller. Moreover, MGuard is isolated
from the host software and hardware, leading to strong
security for remote attackers. Our simulation-based
experimental results show that MGuard introduces no
speed overhead, and is able to detect nearly all the
OS-kernel and hypervisor control data related rootkits
we tested.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Jevdjic:2013:SDC,
author = "Djordje Jevdjic and Stavros Volos and Babak Falsafi",
title = "Die-stacked {DRAM} caches for servers: hit ratio,
latency, or bandwidth? {Have} it all with footprint
cache",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "404--415",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485957",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Recent research advocates using large die-stacked DRAM
caches to break the memory bandwidth wall. Existing
DRAM cache designs fall into one of two categories ---
block-based and page-based. The former organize data in
conventional blocks (e.g., 64B), ensuring low off-chip
bandwidth utilization, but co-locate tags and data in
the stacked DRAM, incurring high lookup latency.
Furthermore, such designs suffer from low hit ratios
due to poor temporal locality. In contrast, page-based
caches, which manage data at larger granularity (e.g.,
4KB pages), allow for reduced tag array overhead and
fast lookup, and leverage high spatial locality at the
cost of moving large amounts of data on and off the
chip. This paper introduces Footprint Cache, an
efficient die-stacked DRAM cache design for server
processors. Footprint Cache allocates data at the
granularity of pages, but identifies and fetches only
those blocks within a page that will be touched during
the page's residency in the cache --- i.e., the page's
footprint. In doing so, Footprint Cache eliminates the
excessive off-chip traffic associated with page-based
designs, while preserving their high hit ratio, small
tag array overhead, and low lookup latency.
Cycle-accurate simulation results of a 16-core server
with up to 512MB Footprint Cache indicate a 57\%
performance improvement over a baseline chip without a
die-stacked cache. Compared to a state-of-the-art
block-based design, our design improves performance by
13\% while reducing dynamic energy of stacked DRAM by
24\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sim:2013:RSD,
author = "Jaewoong Sim and Gabriel H. Loh and Vilas Sridharan
and Mike O'Connor",
title = "Resilient die-stacked {DRAM} caches",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "416--427",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485958",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Die-stacked DRAM can provide large amounts of
in-package, high-bandwidth cache storage. For server
and high-performance computing markets, however, such
DRAM caches must also provide sufficient support for
reliability and fault tolerance. While conventional
off-chip memory provides ECC support by adding one or
more extra chips, this may not be practical in a 3D
stack. In this paper, we present a DRAM cache
organization that uses error-correcting codes (ECCs),
strong checksums (CRCs), and dirty data duplication to
detect and correct a wide range of stacked DRAM
failures, from traditional bit errors to large-scale
row, column, bank, and channel failures. With only a
modest performance degradation compared to a DRAM cache
with no ECC support, our proposal can correct all
single-bit failures, and 99.9993\% of all row, column,
and bank failures, providing more than a 54,000x
improvement in the FIT rate of silent-data corruptions
compared to basic SECDED ECC protection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Du:2013:BMB,
author = "Yu Du and Miao Zhou and Bruce R. Childers and Daniel
Moss{\'e} and Rami Melhem",
title = "Bit mapping for balanced {PCM} cell programming",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "428--439",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485959",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Write bandwidth is an inherent performance bottleneck
for Phase Change Memory (PCM) for two reasons. First,
PCM cells have long programming time, and second, only
a limited number of PCM cells can be programmed
concurrently due to programming current and write
circuit constraints, For each PCM write, the data bits
of the write request are typically mapped to multiple
cell groups and processed in parallel. We observed that
an unbalanced distribution of modified data bits among
cell groups significantly increases PCM write time and
hurts effective write bandwidth. To address this issue,
we first uncover the cyclical and cluster patterns for
modified data bits. Next, we propose double XOR mapping
(D-XOR) to distribute modified data bits among cell
groups in a balanced way. D-XOR can reduce PCM write
service time by 45\% on average, which increases PCM
write throughput by 1.8x. As error correction
(redundant bits) is critical for PCM, we also consider
the impact of redundancy information in mapping data
and error correction bits to cell groups. Our
techniques lead to a 51\% average reduction in write
service time for a PCM main memory with ECC, which
increases IPC by 12\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Seong:2013:TLC,
author = "Nak Hee Seong and Sungkap Yeo and Hsien-Hsin S. Lee",
title = "Tri-level-cell phase change memory: toward an
efficient and reliable memory system",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "440--451",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485960",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "There are several emerging memory technologies looming
on the horizon to compensate the physical scaling
challenges of DRAM. Phase change memory (PCM) is one
such candidate proposed for being part of the main
memory in computing systems. One salient feature of PCM
is its multi-level-cell (MLC) property, which can be
used to multiply the memory capacity at the cell level.
However, due to the nature of PCM that the value
written to the cell can drift over time, PCM is prone
to a unique type of soft errors, posing a great
challenge for their practical deployment. This paper
first quantitatively studied the current art for MLC
PCM in dealing with the resistance drift problem and
showed that the previously proposed techniques such as
scrubbing or error correction mechanisms have
significant reliability challenges to overcome. We then
propose tri-level-cell PCM and demonstrate its ability
to achieving 10$^5$ x lower soft error rate than
four-level-cell PCM and 1.33 x higher information
density than single-level-cell PCM. According to our
findings, the tri-level-cell PCM shows 36.4\%
performance improvement over the four-level-cell PCM
while achieving the soft error rate of DRAM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Azevedo:2013:ZME,
author = "Rodolfo Azevedo and John D. Davis and Karin Strauss
and Parikshit Gopalan and Mark Manasse and Sergey
Yekhanin",
title = "Zombie memory: extending memory lifetime by reviving
dead blocks",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "452--463",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485961",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Zombie is an endurance management framework that
enables a variety of error correction mechanisms to
extend the lifetimes of memories that suffer from bit
failures caused by wearout, such as phase-change memory
(PCM). Zombie supports both single-level cell (SLC) and
multi-level cell (MLC) variants. It extends the
lifetime of blocks in working memory pages (primary
blocks) by pairing them with spare blocks, i.e.,
working blocks in pages that have been disabled due to
exhaustion of a single block's error correction
resources, which would be 'dead' otherwise. Spare
blocks adaptively provide error correction resources to
primary blocks as failures accumulate over time. This
reduces the waste caused by early block failures,
making working blocks in discarded pages a useful
resource. Even though we use PCM as the target
technology, Zombie applies to any memory technology
that suffers stuck-at cell failures. This paper
describes the Zombie framework, a combination of two
new error correction mechanisms (ZombieXOR for SLC and
ZombieMLC for MLC) and the extension of two previously
proposed SLC mechanisms (ZombieECP and ZombieERC). The
result is a 58\% to 92\% improvement in endurance for
Zombie SLC memory and an even more impressive 11x to
17x improvement for ZombieMLC, both with performance
overheads of only 0.1\% when memories using prior error
correction mechanisms reach end of life.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Caulfield:2013:QSA,
author = "Adrian M. Caulfield and Steven Swanson",
title = "{QuickSAN}: a storage area network for fast,
distributed, solid state disks",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "464--474",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485962",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Solid State Disks (SSDs) based on flash and other
non-volatile memory technologies reduce storage
latencies from 10s of milliseconds to 10s or 100s of
microseconds, transforming previously inconsequential
storage overheads into performance bottlenecks. This
problem is especially acute in storage area network
(SAN) environments where complex hardware and software
layers (distributed file systems, block severs, network
stacks, etc.) lie between applications and remote data.
These layers can add hundreds of microseconds to
requests, obscuring the performance of both flash
memory and faster, emerging non-volatile memory
technologies. We describe QuickSAN, a SAN prototype
that eliminates most software overheads and
significantly reduces hardware overheads in SANs.
QuickSAN integrates a network adapter into SSDs, so the
SSDs can communicate directly with one another to
service storage accesses as quickly as possible.
QuickSAN can also give applications direct access to
both local and remote data without operating system
intervention, further reducing software costs. Our
evaluation of QuickSAN demonstrates remote access
latencies of 20 $ \mu $ s for 4 KB requests, bandwidth
improvements of as much as 163x for small accesses
compared with an equivalent iSCSI implementation, and
2.3--3.0x application level speedup for distributed
sorting. We also show that QuickSAN improves energy
efficiency by up to 96\% and that QuickSAN's networking
connectivity allows for improved cluster-level energy
efficiency under varying load.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sanchez:2013:ZFA,
author = "Daniel Sanchez and Christos Kozyrakis",
title = "{ZSim}: fast and accurate microarchitectural
simulation of thousand-core systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "475--486",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485963",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Architectural simulation is time-consuming, and the
trend towards hundreds of cores is making sequential
simulation even slower. Existing parallel simulation
techniques either scale poorly due to excessive
synchronization, or sacrifice accuracy by allowing
event reordering and using simplistic contention
models. As a result, most researchers use sequential
simulators and model small-scale systems with 16--32
cores. With 100-core chips already available,
developing simulators that scale to thousands of cores
is crucial. We present three novel techniques that,
together, make thousand-core simulation practical.
First, we speed up detailed core models (including OOO
cores) with instruction-driven timing models that
leverage dynamic binary translation. Second, we
introduce bound-weave, a two-phase parallelization
technique that scales parallel simulation on multicore
hosts efficiently with minimal loss of accuracy. Third,
we implement lightweight user-level virtualization to
support complex workloads, including multiprogrammed,
client-server, and managed-runtime applications,
without the need for full-system simulation,
sidestepping the lack of scalable OSs and ISAs that
support thousands of cores. We use these techniques to
build zsim, a fast, scalable, and accurate simulator.
On a 16-core host, zsim models a 1024-core chip at
speeds of up to 1,500 MIPS using simple cores and up to
300 MIPS using detailed OOO cores, 2-3 orders of
magnitude faster than existing parallel simulators.
Simulator performance scales well with both the number
of modeled cores and the number of host cores. We
validate zsim against a real Westmere system on a wide
variety of workloads, and find performance and
microarchitectural events to be within a narrow range
of the real system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Leng:2013:GEE,
author = "Jingwen Leng and Tayler Hetherington and Ahmed
ElTantawy and Syed Gilani and Nam Sung Kim and Tor M.
Aamodt and Vijay Janapa Reddi",
title = "{GPUWattch}: enabling energy optimizations in
{GPGPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "487--498",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485964",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "General-purpose GPUs (GPGPUs) are becoming prevalent
in mainstream computing, and performance per watt has
emerged as a more crucial evaluation metric than peak
performance. As such, GPU architects require robust
tools that will enable them to quickly explore new ways
to optimize GPGPUs for energy efficiency. We propose a
new GPGPU power model that is configurable, capable of
cycle-level calculations, and carefully validated
against real hardware measurements. To achieve
configurability, we use a bottom-up methodology and
abstract parameters from the microarchitectural
components as the model's inputs. We developed a
rigorous suite of 80 microbenchmarks that we use to
bound any modeling uncertainties and inaccuracies. The
power model is comprehensively validated against
measurements of two commercially available GPUs, and
the measured error is within 9.9\% and 13.4\% for the
two target GPUs (GTX 480 and Quadro FX5600). The model
also accurately tracks the power consumption trend over
time. We integrated the power model with the
cycle-level simulator GPGPU-Sim and demonstrate the
energy savings by utilizing dynamic voltage and
frequency scaling (DVFS) and clock gating. Traditional
DVFS reduces GPU energy consumption by 14.4\% by
leveraging within-kernel runtime variations. More
finer-grained SM cluster-level DVFS improves the energy
savings from 6.6\% to 13.6\% for those benchmarks that
show clustered execution behavior. We also show that
clock gating inactive lanes during divergence reduces
dynamic power by 11.2\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wu:2013:SMP,
author = "Meng-Ju Wu and Minshu Zhao and Donald Yeung",
title = "Studying multicore processor scaling via reuse
distance analysis",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "499--510",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485965",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "The trend for multicore processors is towards
increasing numbers of cores, with 100s of cores-- i.e.
large-scale chip multiprocessors (LCMPs) --- possible
in the future. The key to realizing the potential of
LCMPs is the cache hierarchy, so studying how memory
performance will scale is crucial. Reuse distance (RD)
analysis can help architects do this. In particular,
recent work has developed concurrent reuse distance
(CRD) and private reuse distance (PRD) profiles to
enable analysis of shared and private caches. Also,
techniques have been developed to predict profiles
across problem size and core count, enabling the
analysis of configurations that are too large to
simulate. This paper applies RD analysis to study the
scalability of multicore cache hierarchies. We present
a framework based on CRD and PRD profiles for reasoning
about the locality impact of core count and problem
scaling. We find interference-based locality
degradation is more significant than sharing-based
locality degradation. For 256 cores running small
problems, the former occurs at small cache sizes,
allowing moderate capacity scaling of multicore caches
to achieve the same cache performance (MPKI) as a
single-core cache. At very large problems,
interference-based locality degradation increases
significantly in many of our benchmarks. For shared
caches, this prevents most of our benchmarks from
achieving constant-MPKI scaling within a 256 MB
capacity budget; for private caches, all benchmarks
cannot achieve constant-MPKI scaling within 256 MB.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{DuBois:2013:CSI,
author = "Kristof {Du Bois} and Stijn Eyerman and Jennifer B.
Sartor and Lieven Eeckhout",
title = "Criticality stacks: identifying critical threads in
parallel programs using synchronization behavior",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "511--522",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485966",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Analyzing multi-threaded programs is quite
challenging, but is necessary to obtain good multicore
performance while saving energy. Due to
synchronization, certain threads make others wait,
because they hold a lock or have yet to reach a
barrier. We call these critical threads, i.e., threads
whose performance is determinative of program
performance as a whole. Identifying these threads can
reveal numerous optimization opportunities, for the
software developer and for hardware. In this paper, we
propose a new metric for assessing thread criticality,
which combines both how much time a thread is
performing useful work and how many co-running threads
are waiting. We show how thread criticality can be
calculated online with modest hardware additions and
with low overhead. We use our metric to create
criticality stacks that break total execution time into
each thread's criticality component, allowing for easy
visual analysis of parallel imbalance. To validate our
criticality metric, and demonstrate it is better than
previous metrics, we scale the frequency of the most
critical thread and show it achieves the largest
performance improvement. We then demonstrate the broad
applicability of criticality stacks by using them to
perform three types of optimizations: (1) program
analysis to remove parallel bottlenecks, (2)
dynamically identifying the most critical thread and
accelerating it using frequency scaling to improve
performance, and (3) showing that accelerating only the
most critical thread allows for targeted energy
reduction.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kurian:2013:LAA,
author = "George Kurian and Omer Khan and Srinivas Devadas",
title = "The locality-aware adaptive cache coherence protocol",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "523--534",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485967",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Next generation multicore applications will process
massive amounts of data with significant sharing. Data
movement and management impacts memory access latency
and consumes power. Therefore, harnessing data locality
is of fundamental importance in future processors. We
propose a scalable, efficient shared memory cache
coherence protocol that enables seamless adaptation
between private and logically shared caching of on-chip
data at the fine granularity of cache lines. Our
data-centric approach relies on in-hardware yet
low-overhead runtime profiling of the locality of each
cache line and only allows private caching for data
blocks with high spatio-temporal locality. This allows
us to better exploit the private caches and enable
low-latency, low-energy memory access, while retaining
the convenience of shared memory. On a set of parallel
benchmarks, our low-overhead locality-aware mechanisms
reduce the overall energy by 25\% and completion time
by 15\% in an NoC-based multicore with the
Reactive-NUCA on-chip cache organization and the
ACKwise limited directory-based coherence protocol.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kaxiras:2013:NPE,
author = "Stefanos Kaxiras and Alberto Ros",
title = "A new perspective for efficient virtual-cache
coherence",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "535--546",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485968",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Coherent shared virtual memory (cSVM) is highly
coveted for heterogeneous architectures as it will
simplify programming across different cores and
manycore accelerators. In this context, virtual L1
caches can be used to great advantage, e.g., saving
energy consumption by eliminating address translation
for hits. Unfortunately, multicore virtual-cache
coherence is complex and costly because it requires
reverse translation for any coherence request directed
towards a virtual L1. The reason is the ambiguity of
the virtual address due to the possibility of synonyms.
In this paper, we take a radically different approach
than all prior work which is focused on reverse
translation. We examine the problem from the
perspective of the coherence protocol. We show that if
a coherence protocol adheres to certain conditions, it
operates effortlessly with virtual caches, without
requiring reverse translations even in the presence of
synonyms. We show that these conditions hold in a new
class of simple and efficient request-response
protocols that use both self-invalidation and
self-downgrade. This results in a new solution for
virtual-cache coherence, significantly less complex and
more efficient than prior proposals. We study design
choices for TLB placement under our proposal and
compare them against those under a directory-MESI
protocol. Our approach allows for choices that are
particularly effective as for example combining all
per-core TLBs in a single logical TLB in front of the
last level cache. Significant area, energy, and
performance benefits ensue as a result of simplifying
the entire multicore memory organization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhao:2013:PAG,
author = "Hongzhou Zhao and Arrvindh Shriraman and Snehasish
Kumar and Sandhya Dwarkadas",
title = "{Protozoa}: adaptive granularity cache coherence",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "547--558",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485969",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "State-of-the-art multiprocessor cache hierarchies
propagate the use of a fixed granularity in the cache
organization to the design of the coherence protocol.
Unfortunately, the fixed granularity, generally chosen
to match average spatial locality across a range of
applications, not only results in wasted bandwidth to
serve an individual thread's access needs, but also
results in unnecessary coherence traffic for shared
data. The additional bandwidth has a direct impact on
both the scalability of parallel applications and
overall energy consumption. In this paper, we present
the design of Protozoa, a family of coherence protocols
that eliminate unnecessary coherence traffic and match
data movement to an application's spatial locality.
Protozoa continues to maintain metadata at a
conventional fixed cache line granularity while (1)
supporting variable read and write caching granularity
so that data transfer matches application spatial
granularity, (2) invalidating at the granularity of the
write miss request so that readers to disjoint data can
co-exist with writers, and (3) potentially supporting
multiple non-overlapping writers within the cache line,
thereby avoiding the traditional ping-pong effect of
both read-write and write-write false sharing. Our
evaluation demonstrates that Protozoa consistently
reduce miss rate and improve the fraction of
transmitted data that is actually utilized.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Demme:2013:FOM,
author = "John Demme and Matthew Maycock and Jared Schmitz and
Adrian Tang and Adam Waksman and Simha Sethumadhavan
and Salvatore Stolfo",
title = "On the feasibility of online malware detection with
performance counters",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "559--570",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485970",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "The proliferation of computers in any domain is
followed by the proliferation of malware in that
domain. Systems, including the latest mobile platforms,
are laden with viruses, rootkits, spyware, adware and
other classes of malware. Despite the existence of
anti-virus software, malware threats persist and are
growing as there exist a myriad of ways to subvert
anti-virus (AV) software. In fact, attackers today
exploit bugs in the AV software to break into systems.
In this paper, we examine the feasibility of building a
malware detector in hardware using existing performance
counters. We find that data from performance counters
can be used to identify malware and that our detection
techniques are robust to minor variations in malware
programs. As a result, after examining a small set of
variations within a family of malware on Android ARM
and Intel Linux platforms, we can detect many
variations within that family. Further, our proposed
hardware modifications allow the malware detector to
run securely beneath the system software, thus setting
the stage for AV implementations that are simpler and
less buggy than software AV. Combined, the robustness
and security of hardware AV techniques have the
potential to advance state-of-the-art online malware
detection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ren:2013:DSE,
author = "Ling Ren and Xiangyao Yu and Christopher W. Fletcher
and Marten van Dijk and Srinivas Devadas",
title = "Design space exploration and optimization of path
oblivious {RAM} in secure processors",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "571--582",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485971",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Keeping user data private is a huge problem both in
cloud computing and computation outsourcing. One
paradigm to achieve data privacy is to use
tamper-resistant processors, inside which users'
private data is decrypted and computed upon. These
processors need to interact with untrusted external
memory. Even if we encrypt all data that leaves the
trusted processor, however, the address sequence that
goes off-chip may still leak information. To prevent
this address leakage, the security community has
proposed ORAM (Oblivious RAM). ORAM has mainly been
explored in server/file settings which assume a vastly
different computation model than secure processors. Not
surprisingly, na{\"\i}vely applying ORAM to a secure
processor setting incurs large performance overheads.
In this paper, a recent proposal called Path ORAM is
studied. We demonstrate techniques to make Path ORAM
practical in a secure processor setting. We introduce
background eviction schemes to prevent Path ORAM
failure and allow for a performance-driven design space
exploration. We propose a concept called super blocks
to further improve Path ORAM's performance, and also
show an efficient integrity verification scheme for
Path ORAM. With our optimizations, Path ORAM overhead
drops by 41.8\%, and SPEC benchmark execution time
improves by 52.4\% in relation to a baseline
configuration. Our work can be used to improve the
security level of previous secure processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wassel:2013:SLL,
author = "Hassan M. G. Wassel and Ying Gao and Jason K. Oberg
and Ted Huffmire and Ryan Kastner and Frederic T. Chong
and Timothy Sherwood",
title = "{SurfNoC}: a low latency and provably non-interfering
approach to secure networks-on-chip",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "583--594",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485972",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "As multicore processors find increasing adoption in
domains such as aerospace and medical devices where
failures have the potential to be catastrophic, strong
performance isolation and security become first-class
design constraints. When cores are used to run separate
pieces of the system, strong time and space
partitioning can help provide such guarantees. However,
as the number of partitions or the asymmetry in
partition bandwidth allocations grows, the additional
latency incurred by time multiplexing the network can
significantly impact performance. In this paper, we
introduce SurfNoC, an on-chip network that
significantly reduces the latency incurred by temporal
partitioning. By carefully scheduling the network into
waves that flow across the interconnect, data from
different domains carried by these waves are strictly
non-interfering while avoiding the significant
overheads associated with cycle-by-cycle time
multiplexing. We describe the scheduling policy and
router microarchitecture changes required, and evaluate
the information-flow security of a synthesizable
implementation through gate-level information flow
analysis. When comparing our approach for varying
numbers of domains and network sizes, we find that in
many cases SurfNoC can reduce the latency overhead of
implementing cycle-level non-interference by up to
85\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2013:VPD,
author = "Di Wang and Chuangang Ren and Anand Sivasubramaniam",
title = "Virtualizing power distribution in datacenters",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "595--606",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485973",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Power infrastructure contributes to a significant
portion of datacenter expenditures. Overbooking this
infrastructure for a high percentile of the needs is
becoming more attractive than for occasional peaks.
There exist several computing knobs to cap the power
draw within such under-provisioned capacity. Recently,
batteries and other energy storage devices have been
proposed to provide a complementary alternative to
these knobs, which when decentralized (or
hierarchically placed), can temporarily take the load
to suppress power peaks propagating up the hierarchy.
With aggressive under-provisioning, the power hierarchy
becomes as central a datacenter resource as other
computing resources, making it imperative to carefully
allocate, isolate and manage this resource (including
batteries), across applications. Towards this goal, we
present vPower, a software system to virtualize power
distribution. vPower includes mechanisms and policies
to provide a virtual power hierarchy for each
application. It leverages traditional computing knobs
as well as batteries, to apportion and manage the
infrastructure between co-existing applications in the
hierarchy. vPower allows applications to specify their
power needs, performs admission control and placement,
dynamically monitors power usage, and enforces
allocations for fairness and system efficiency. Using
several datacenter applications, and a 2-level power
hierarchy prototype containing batteries at both
levels, we demonstrate the effectiveness of vPower when
working in an under-provisioned power infrastructure,
using the right computing knobs and the right batteries
at the right time. Results show over 50\% improved
system utilization and scale-out for vPower's
over-booking, and between 12--28\% better application
performance than traditional power-capping control
knobs. It also ensures isolation between applications
competing for power.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yang:2013:BFP,
author = "Hailong Yang and Alex Breslow and Jason Mars and
Lingjia Tang",
title = "{Bubble-Flux}: precise online {QoS} management for
increased utilization in warehouse scale computers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "607--618",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485974",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Ensuring the quality of service (QoS) for
latency-sensitive applications while allowing
co-locations of multiple applications on servers is
critical for improving server utilization and reducing
cost in modern warehouse-scale computers (WSCs). Recent
work relies on static profiling to precisely predict
the QoS degradation that results from performance
interference among co-running applications to increase
the number of ``safe'' co-locations. However, these
static profiling techniques have several critical
limitations: (1) a priori knowledge of all workloads is
required for profiling, (2) it is difficult for the
prediction to capture or adapt to phase or load changes
of applications, and (3) the prediction technique is
limited to only two co-running applications. To address
all of these limitations, we present Bubble-Flux, an
integrated dynamic interference measurement and online
QoS management mechanism to provide accurate QoS
control and maximize server utilization. Bubble-Flux
uses a Dynamic Bubble to probe servers in real time to
measure the instantaneous pressure on the shared
hardware resources and precisely predict how the QoS of
a latency-sensitive job will be affected by potential
co-runners. Once ``safe'' batch jobs are selected and
mapped to a server, Bubble-Flux uses an Online Flux
Engine to continuously monitor the QoS of the
latency-sensitive application and control the execution
of batch jobs to adapt to dynamic input, phase, and
load changes to deliver satisfactory QoS. Batch
applications remain in a state of flux throughout
execution. Our results show that the utilization
improvement achieved by Bubble-Flux is up to 2.2x
better than the prior static approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mars:2013:WMH,
author = "Jason Mars and Lingjia Tang",
title = "{Whare-map}: heterogeneity in ``homogeneous''
warehouse-scale computers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "619--630",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485975",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "Modern ``warehouse scale computers'' (WSCs) continue
to be embraced as homogeneous computing platforms.
However, due to frequent machine replacements and
upgrades, modern WSCs are in fact composed of diverse
commodity microarchitectures and machine
configurations. Yet, current WSCs are architected with
the assumption of homogeneity, leaving a potentially
significant performance opportunity unexplored. In this
paper, we expose and quantify the performance impact of
the ``homogeneity assumption'' for modern production
WSCs using industry-strength large-scale web-service
workloads. In addition, we argue for, and evaluate the
benefits of, a heterogeneity-aware WSC using commercial
web-service production workloads including Google's
web-search. We also identify key factors impacting the
available performance opportunity when exploiting
heterogeneity and introduce a new metric, opportunity
factor, to quantify an application's sensitivity to the
heterogeneity in a given WSC. To exploit heterogeneity
in ``homogeneous'' WSCs, we propose ``Whare-Map,'' the
W{\sc H}eterogeneity Aw{\sc are Mapper} that leverages
already in-place continuous profiling subsystems found
in production environments. When employing
``Whare-Map'', we observe a cluster-wide performance
improvement of 15\% on average over heterogeneity ---
oblivious job placement and up to an 80\% improvement
for web-service applications that are particularly
sensitive to heterogeneity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Foutris:2013:DMA,
author = "Nikos Foutris and Dimitris Gizopoulos and Xavier Vera
and Antonio Gonzalez",
title = "Deconfigurable microprocessor architectures for
silicon debug acceleration",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "631--642",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485976",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "The share of silicon debug in the overall
microprocessor chips development cycle is rapidly
expanding due to the ever growing design complexity and
the limited efficiency of pre-silicon validation
methods. Massive application of short random test
programs on the prototype microprocessor chips is one
of the most effective parts of silicon debug. However,
a major bottleneck and source of ``noise'' in this
phase is that large numbers of random test programs
fail due to the same or similar design bugs. This
redundant behavior adds long delays in the debug flow
since each failing random program must be separately
examined, although it does not usually bring new debug
information. The development of effective techniques
that detect dominant modes of failure among random
programs and triage them into common categories
eliminate redundant debug sessions and significantly
boost silicon debug. We propose the employment of
deconfigurable microprocessor architectures along with
self-checking random test programs to reduce the
redundant debug sessions and make the triage step of
silicon debug more efficient. Several hardware
components of high performance microprocessor
micro-architectures can be deconfigured while keeping
the functional completeness of the design. This is the
property we exploit in our silicon debug methodology
for the triaging of random test programs. We support
our methodology by a hardware mechanism dedicated to
silicon debug that groups the failing test programs
into categories depending on the microprocessor
hardware components that need to be deconfigured for a
random test program to be correctly executed. Identical
deconfiguration sequences for multiple test programs
indicate the existence of redundancy among them and
group them together. This grouping significantly
reduces the number of failing tests that must be
debugged afterwards. Detailed evaluation of the method
on an x86 microprocessor demonstrates its efficiency in
reducing the debug sessions and thus in accelerating
silicon debug.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Pokam:2013:QPI,
author = "Gilles Pokam and Klaus Danne and Cristiano Pereira and
Rolf Kassa and Tim Kranich and Shiliang Hu and Justin
Gottschlich and Nima Honarmand and Nathan Dautenhahn
and Samuel T. King and Josep Torrellas",
title = "{QuickRec}: prototyping an {Intel} architecture
extension for record and replay of multithreaded
programs",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "643--654",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485977",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "There has been significant interest in
hardware-assisted deterministic Record and Replay (RnR)
systems for multithreaded programs on multiprocessors.
However, no proposal has implemented this technique in
a hardware prototype with full operating system
support. Such an implementation is needed to assess RnR
practicality. This paper presents QuickRec, the first
multicore Intel Architecture (IA) prototype of RnR for
multithreaded programs. QuickRec is based on QuickIA,
an Intel emulation platform for rapid prototyping of
new IA extensions. QuickRec is composed of a Xeon
server platform with FPGA-emulated second-generation
Pentium cores, and Capo3, a full software stack for
managing the recording hardware from within a modified
Linux kernel. This paper's focus is understanding and
evaluating the implementation issues of RnR on a real
platform. Our effort leads to some lessons learned, as
well as to some pointers for future research. We
demonstrate that RnR can be implemented efficiently on
a real multicore IA system. In particular, we show that
the rate of memory log generation is insignificant, and
that the recording hardware has negligible performance
overhead. However, the software stack incurs an average
recording overhead of nearly 13\%, which must be
reduced to enable always-on use of RnR.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Huang:2013:NRC,
author = "Ruirui Huang and Erik Halberg and G. Edward Suh",
title = "Non-race concurrency bug detection through
order-sensitive critical sections",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "655--666",
month = jun,
year = "2013",
DOI = "https://doi.org/10.1145/2508148.2485978",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "This paper introduces a new heuristic condition for
non-race concurrency bugs, named order-sensitive
critical sections, and proposes a run-time bug
detection scheme based on the condition. The
order-sensitive critical sections are defined as a pair
of critical sections that can lead to non-deterministic
shared memory state depending on the order in which
they execute. In a sense, the order-sensitive critical
sections can be seen as extending the intuition in
using data races as a potential bug condition to
capture non-race bugs. Experiments show that the
proposed scheme provides a good coverage for multiple
types of non-race bugs, with a small number of false
positives. For example, the scheme detected all 9
real-world non-race bugs that were tested as well as
over 90\% of injected non-race bugs. Additionally, this
paper presents an efficient hardware architecture that
supports the proposed scheme with minor hardware
changes and a small amount of additional state --- a
9-KB buffer per core and a 1-bit tag per data cache
block. The hardware-based scheme could still detect all
9 real-world bugs that were tested and more than 84\%
of the injected non-race bugs. Moreover, the hardware
supported scheme has a negligible impact on
performance, with a 0.23\% slowdown on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maitra:2013:HEM,
author = "Subhashis Maitra and Amitabha Sinha",
title = "High efficiency {MAC} unit used in digital signal
processing and elliptic curve cryptography",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "4",
pages = "1--7",
month = sep,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2560488.2560490",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 2 17:25:55 MST 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Computational complexities of different Algorithms to
enhance the speed of response of digital signal
processor and different cryptographic analysis are the
important issues for the current researcher.
Computational complexities means hardware complexities
and timing complexities. Both the complexities depend
on the design of the software and hardware. Arithmetic
computation like addition and multiplication are the
major parts in designing processor that helps to
improve the efficiency and to reduce complexities.
Hence the design of a multiplier unit is the major
issue to the current researchers. There are different
multiplication algorithms discussed in different
research materials. In this paper, a new algorithm for
multiplication has been proposed to enhance the speed
of operation and to reduce hardware complexities. Also
a comparative study of the proposed algorithm over
different existing algorithms has been explained here
along with VHDL model of the proposed architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Janjusic:2013:GMP,
author = "Tomislav Janjusic and Krishna Kavi",
title = "{Gleipnir}: a memory profiling and tracing tool",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "4",
pages = "8--12",
month = sep,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2560488.2560491",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 2 17:25:55 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this article we describe a memory tracing and
profiling tool called Gleipnir. Gleipnir is a plug-in
tool for a widely used binary instrumentation
framework, Valgrind. Gleipnir's ability to collect fine
grained memory traces and associate each access to
source level data structures and elements of these
structures, makes it a good candidate tool for advanced
memory analysis and studying complex memory access
patterns. The data provided by Gleipnir may be used by
cache simulators to analyze accesses to data structure
elements and understand the dynamic memory behavior of
programs. The goal of Gleipnir is to give the
programmer aid in refactoring data and code. In
addition to Gleipnir we introduce a cache simulation
tool, Gl cSim. Gl cSim is an extension to DineroIV (a
uni-processor simulator) that tracks Gleipnir provided
trace and debug-information.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2013:INb,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "4",
pages = "13--22",
month = sep,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2560488.2560493",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 2 17:25:55 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Godard:2013:MSS,
author = "Ivan Godard",
title = "The {Mill}: split-stream encoding",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "1--5",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641363",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Real-world programs often thrash in the instruction
cache, especially when SMT methods are used. The MillTM
split-stream encoding doubles the effective capacity of
the instruction cache at no increase in per-instruction
power usage or cache access latency, while also sharply
increasing the potential maximal decode rate for
instruction sets that use variable-length encoding.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomasian:2013:DAM,
author = "Alexander Thomasian",
title = "Disk arrays with multiple {RAID} levels",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "6--24",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641364",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose Heterogeneous Disk Arrays (HDAs), which
allow multiple RAID levels for database applications to
coexist in a single disk array accommodating multiple
RAID levels. Our main concern is to efficiently utilize
disk bandwidth and capacity, while balancing disk loads
in a cloud storage environment, however, a small number
of disks is considered in this study for illustrative
purposes. Individual RAID levels are adjusted to data
availability requirements and workload demands.
Adopting the most stringent availability requirements
for all datasets would incur unnecessarily high
bandwidth overhead for updating datasets, which do not
have this requirement. Intermixing RAID levels is
beneficial from the viewpoint of balancing disk loads,
similarly to the striping paradigm in RAID5. The
suitability of the RAID levels varies with database
applications: RAID5 --- reading/writing large datasets
for data mining and warehousing, RAID1 -high
performance OLTP applications. Several single pass data
allocation methods are proposed in this paper and
compared using synthetically generated allocation
requests in associated papers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Maitra:2013:DSM,
author = "Subhashis Maitra and Amitabha Sinha",
title = "Design and simulation of {MAC} unit using
combinational circuit and adder",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "25--33",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641365",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware and timing complexities of MAC unit to
perform arithmetic operation like addition or
multiplication especially in the field of Digital
Signal Processing (DSP) or Elliptic Curve Cryptography
(ECC) are the major issues to the designer. The
multiplication operation is essential and abundant in
DSP Applications. In order to achieve maximum
implementation efficiency and timing performance,
designing a DSP systems is critical and frequently
presents a significant challenge to hardware engineers.
There are certain multipliers that simplify this
challenge by abstracting away FPGA device specifics,
while maintaining the required maximum performance and
resource efficiency. These multipliers are able to
perform parallel multiplication and hence constant
coefficient multiplication, both with differing
implementation styles. Again with the aid of
instantaneous resource estimation, hardware engineers
can rapidly select the optimal solution for their
system. The latest additions to the IP provide fine
control over the latency using the concept of
pipelining of the multipliers that are purely
combinatorial to be fully pipelined. Here a new
compensation method that reduces both the hardware and
timing complexities of the multiplier used for DSP
application or ECC application has been proposed. The
design of the MAC unit based on the proposed
compensation method has been dealt here properly using
Xilinx 13.2 and compared with array multiplier, Booth
multiplier and Vedic multiplier to show its novelty
over them. The hardware complexity is reduced to about
60\% of the original multiplier. Design results show
that the proposed architecture has lower hardware
overhead, lower error and fast operating speed as
compared with array, Booth and Vedic multiplier.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chau:2013:ASM,
author = "Thomas C. P. Chau and James S. Targett and Marlon
Wijeyasinghe and Wayne Luk and Peter Y. K. Cheung and
Benjamin Cope and Alison Eele and Jan Maciejowski",
title = "Accelerating sequential {Monte Carlo} method for
real-time air traffic management",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "35--40",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641367",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents how field-programmable gate arrays
(FPGAs) are used to accelerate the Sequential Monte
Carlo method for air traffic management. A novel data
structure is introduced for a particle stream that
enables efficient evaluation of constraints and
weights. A parallel implementation for this streaming
data structure is designed, and an analytical model is
provided for estimating the performance and resource
usage of our implementation. We compare our design to
implementations on CPU and GPU. We show 9.3 times speed
up and 89 times improvement in energy efficiency over
an Intel Core i7-950 CPU with 8 threads and demonstrate
1.3 times speed up and 13.5 times improvement in energy
efficiency over an NVIDIA Tesla C2070 GPU with 448
cores. We also estimate the performance of FPGA in
future scenario and show that FPGA is able to control
15 times and 2.8 times more aircraft than CPU and GPU
in real-time respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Mahram:2013:NBC,
author = "Atabak Mahram and Martin C. Herbordt",
title = "{NCBI BLASTP} on the {Convey HC1-EX}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "41--46",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641368",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The BLAST sequence alignment program is a central
application in bioinformatics. The de facto standard
version, NCBI BLAST, uses complex heuristics which make
it challenging to simultaneously achieve both high
performance and exact agreement. In previous work, a
system that used novel FPGA-based filters reduced the
input database by over 99.97\% without loss of
sensitivity. In the present work we report experiences
in getting from a prototype to a potential product for
the Convey HC1-EX. There are several issues. The first
is the efforts made to maintain timing for a highly
complex configuration as it is optimized by including
additional filter stages. This requires implementation
and optimization of new interface logic as well as
floor-planning. The second is the system-level
tradeoffs necessary to maintain correctness. The issue
here is preventing low frequency events, which
necessarily cannot be mapped to the FPGA, from diluting
the performance benefits without sacrificing
sensitivity. We present results for various usage
scenarios and find a factor of nearly 5x speed-up over
a fully parallel implementation of the reference code
on a contemporaneous CPU. We believe that the resulting
system is the leading accelerated NCBI BLAST. The
significance of this work is that, while such in-depth
work is necessary to achieve high performance for
complex systems, these issues are rarely described in
the academic literature.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sano:2013:ECC,
author = "Kentaro Sano and Yoshiaki Kono and Hayato Suzuki and
Ryotaro Chiba and Ryo Ito and Tomohiro Ueno and Kyo
Koizumi and Satoru Yamamoto",
title = "Efficient custom computing of fully-streamed lattice
{Boltzmann} method on tightly-coupled {FPGA} cluster",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "47--52",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641369",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents the detailed design of a custom
computing machine for fully-streamed LBM computation on
multiple FPGAs, and evaluates its efficiency with
prototype implementation. We design a unit for
completely streamed computation including boundary
treatment with a newly introduced cell attribute.
Experimental results demonstrate that the proposed
machine achieves high utilization of PEs, 99 \% of the
peak performance, for one and two FPGAs computing a
large lattice. This is due to our fully-streamed design
to allow all arithmetic units to be efficiently
utilized with a constant memory bandwidth, and the
architecture to exploit a low-latency accelerator
domain network (ADN) of a tightly-coupled FPGA cluster
for scalable computation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Vanderbauwhede:2013:HCF,
author = "Wim Vanderbauwhede and Anton Frolov and Sai Rahul
Chalamalasetti and Martin Margala",
title = "A hybrid {CPU--FPGA} system for high throughput
{(10Gb/s)} streaming document classification",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "53--58",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641370",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Processing large volumes of information in real time
requires large amounts of computational power, which
consumes a significant amount of energy. With the rise
in the amount of data produced, energy-efficient
high-performance information processing systems are
becoming a necessity. We present a hybrid CPU-FPGA
system for high-throughput classification of streams of
textual documents (e.g. emails or web pages). The
current system parses the document stream using a
multicore CPU and performs classification on the parsed
stream using Field-Programmable Gate Arrays (FPGAs). As
an example, we demonstrate a Naive Bayes classifier on
the TREC Aquaint dataset. Our current solution can
classify 10Gb/s internet traffic in real time. Our aim
is to increase the throughput to 100Gb/s by
incorporating the parser into the FPGA design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Guo:2013:CPE,
author = "Ce Guo and Wayne Luk and Ekaterina Vinkovskaya and
Rama Cont",
title = "Customisable pipelined engine for intensity evaluation
in multivariate {Hawkes} point processes",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "59--64",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641371",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hawkes processes are point processes that can be used
to build probabilistic models to capture occurrence
patterns of random events. They are widely used in
high-frequency trading, seismic analysis and
neuroscience. A critical calculation in Hawkes process
models is intensity evaluation. The intensity of a
point process represents the instantaneous rate of
occurrence of events, but it is computationally
expensive and challenging to calculate efficiently in
order to make predictions using Hawkes process models.
To accelerate the computation, we analyse data
dependency in the intensity evaluation routine, and
present a strategy to enable multiple intensities to be
computed with a single pass through the data. We then
design and optimise a pipelined hardware engine based
on our strategy. In our experiments, an FPGA-based
implementation of the proposed engine is evaluated by
four case studies. This implementation achieves up to
94 times speedup over an optimised CPU implementation
with one core, and 12 times speedup over a CPU with
eight cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Giefers:2013:AFD,
author = "Heiner Giefers and Christian Plessl and Jens
F{\"o}rstner",
title = "Accelerating finite difference time domain simulations
with reconfigurable dataflow computers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "65--70",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641372",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Finite difference methods are widely used, highly
parallel algorithms for solving differential equations.
However, the algorithms are memory bound and thus
difficult to implement efficiently on CPUs or GPUs. In
this work we study the implementation of the finite
difference time domain (FDTD) method for solving
Maxwell's equations on an FPGA-based Maxeler dataflow
computer. We evaluate our work with actual problems
from the domain of computational nanophotonics. The use
of realistic simulations requires us to pay special
attention to boundary conditions (Dirichlet, periodic,
absorbing), which are critical for the correctness of
results but detrimental to the performance and thus
frequently neglected. We discuss and evaluate the
design of two different FDTD implementations, which
outperform CPU and GPU implementations. To our
knowledge, our implementation is the fastest FPGA-based
FDTD solver.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ogawa:2013:RJA,
author = "Yuki Ogawa and Masahiro Iida and Motoki Amagasaki and
Morihiro Kuga and Toshinori Sueyoshi",
title = "A reconfigurable {Java} accelerator with software
compatibility for embedded systems",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "71--76",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641373",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ohkawa:2013:RHO,
author = "Takeshi Ohkawa and Daichi Uetake and Takashi Yokota
and Kanemitsu Ootsu and Takanobu Baba",
title = "Reconfigurable and hardwired {ORB} engine on {FPGA} by
{Java-to-HDL} synthesizer for realtime application",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "77--82",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641374",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A platform for networked FPGA system design, which is
named ``ORB Engine'', is proposed to add more
controllability and design productivity on FPGA-based
systems composed of software and hardwired IPs. A
developer can define an object-oriented interface for
the circuit IP in FPGA, and implement the control
sequence part using Java. The circuit IP in FPGA can be
handled through object-oriented interface from variety
of programming languages like C++, Java, Python, Ruby
and so on. Application specific and high-efficiency
circuit for ORB (Object Request Broker) protocol
processing is synthesized from easy-handling Java code
using JavaRock Java-to-HDL synthesizer within the
de-facto standard CORBA (Common Object Request Broker
Architecture). The measurement result shows a very low
latency as low as 200us of UDP/IP packet in/out and
exhibits a fluctuation free delay performance, which is
desirable for real-time applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{deDinechin:2013:FPT,
author = "Florent de Dinechin and Matei Istoan and Guillaume
Sergent",
title = "Fixed-point trigonometric functions on {FPGAs}",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "83--88",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641375",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/elefunt.bib;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Three approaches for computing sines and cosines on
FPGAs are studied in this paper, with a focus of
high-throughput pipelined architecture, and
state-of-the-art implementation techniques. The first
approach is the classical CORDIC iteration, for which
we suggest a reduced iteration technique and fine
optimizations in datapath width and latency. The second
is an ad-hoc architecture specifically designed around
trigonometric identities. The third uses a generic
table- and DSP-based polynomial approximator. These
three architectures are implemented and compared in the
FloPoCo framework.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tada:2013:PED,
author = "Jubee Tada",
title = "Performance evaluation of {$3$-D} stacked $ 32$-bit
parallel multipliers",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "89--94",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641376",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Conventional two-dimensional (2-D) implementation
technologies face certain limitations; to overcome
these limitations, three-dimensional (3-D) integration
technologies have been developed. There has been a
focus on circuit partitioning strategies because they
play an important role in exploiting the potential of
3-D stacked circuits. The Middle-Grain circuit
partitioning strategy has been proposed to exploit the
potential of 3-D stacked circuits. The proposed
strategy equalizes the area of each layer and avoids
the critical paths across different layers as much as
possible. In this study, 3-D stacked parallel
multipliers are designed using various circuit
partitioning strategies. Experimental results
demonstrate that the 3-D stacked 32-bit parallel
multiplier, designed using the proposed strategy,
achieves a 27\% delay reduction as compared to the 2-D
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tanaka:2013:USP,
author = "Yuichiroh Tanaka and Shimpei Sato and Kenji Kise",
title = "The {UltraSmall} soft processor",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "95--100",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641377",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A soft processor is a processor that is implemented
using logic synthesis mainly targeting programmable
logic device like FPGA and it becomes a common
component for FPGA designs. The supersmall soft
processor (small-core) developed at University of
Toronto is a unique soft processor because its main
concern is very low hardware cost while supporting
32-bit ISA. With the same concept as small-core, we are
developing the ultrasmall soft processor (UltraSmall)
based on smallcore. The goal of this project is to
implement the smallest 32-bit ISA soft processor while
aiming to achieve high performance. We propose
UltraSmall and describe its key ideas and
implementations. The evaluation results indicate that
the hardware cost of UltraSmall is smaller than
smallcore in the latest FPGA while achieving 1.8x
performance of small-core.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Guo:2013:CAS,
author = "Liucheng Guo and David B. Thomas and Wayne Luk",
title = "Customisable architectures for the set covering
problem",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "101--106",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641378",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes novel customisable streaming
architectures for the NP-hard set covering problem. Our
approach covers both exhaustive and genetic algorithms,
supporting coarse-grain parallelism and deep pipelines
while allowing trade-offs between performance and
resource usage. Experiments targeting Maxeler systems
show that our FPGA-based designs are more effective
than the corresponding multicore software versions. The
speed up of the exhaustive algorithm exceeds 250 times,
and that of the genetic algorithm exceeds 60 times.
Meanwhile, our implementations are more flexible than
other FPGA solutions, allowing users to customise
parameters at run time without recompilation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Plumbridge:2013:BPR,
author = "Gary Plumbridge and Jack Whitham and Neil Audsley",
title = "{Blueshell}: a platform for rapid prototyping of
multiprocessor {NoCs} and accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "107--117",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641379",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The rapid increase in FPGA logic capacity has enabled
the prototyping of multiprocessor Network-on-Chip (NoC)
architectures. However, the design space exploration of
these complex architectures is highly time consuming
with traditional methodologies for FPGA design. Our
paper addresses the challenges of multiprocessor
network design with the Blueshell framework for
generating multiprocessor networks on chip (NoC) and a
coupled Java software stack, Network-Chi. With
Blueshell hardware is constructed from high-level
components including processors and routers using
concise Bluespec System Verilog. The Network-Chi
software framework is also presented to enable
programming the on-chip processors in a familiar Java
style and without exposing the low-level systems
programming to the application designer. We demonstrate
that Blueshell systems with as many as 20 processors
can be implemented on a modestly sized FPGA.
Performance figures for a selection of distributed
applications are also provided.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hong:2013:RTR,
author = "Chuan Hong and Khaled Benkrid and Nazrin Isa and
Xabier Iturbe",
title = "A run-time reconfigurable system for adaptive high
performance efficient computing",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "113--118",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641380",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Field programmable hardware gives electronic systems
the ability to be reconfigured at run time. This allows
electronic systems to be more efficiently customized on
demand and on-the-fly depending on user requirements
and environmental changes. This paper presents a
run-time reconfigurable system that allows computing
tasks to adjust their sizes in response to current
available resources, optimizing the overall performance
by maximally exploiting all the resources on the chip.
In particular, we present a novel run-time task
assembler, which assembles tasks with desired
parameters on-the-fly, together with an efficacious
run-time task placer to rapidly configure tasks at
optimum locations. The system is demonstrated with a
dynamic programming-based pairwise sequence alignment
application. Real hardware implementation result shows
that our run-time reconfigurable system optimizes
resource usage on the fly by ~ 3x, while matching the
performance of carefully hand-crafted static
solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2013:INc,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "5",
pages = "119--127",
month = dec,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2641361.2641382",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:43 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Davis:2014:IWA,
author = "Al Davis",
title = "Inside {Windows Azure}: the challenges and
opportunities of a cloud operating system",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "1--2",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2560008",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cloud operating systems provide on-demand, scalable
compute and storage resources. They allow service
developers to focus on their business logic by
simplifying many portions of their service, including
resource management, provisioning, monitoring, and
application lifecycle management. This talk describes
some of the technical challenges faced, as well as
emergent opportunities created, by Microsoft's cloud
operating system Windows Azure.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Novakovic:2014:SN,
author = "Stanko Novakovic and Alexandros Daglis and Edouard
Bugnion and Babak Falsafi and Boris Grot",
title = "Scale-out {NUMA}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "3--18",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541965",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging datacenter applications operate on vast
datasets that are kept in DRAM to minimize latency. The
large number of servers needed to accommodate this
massive memory footprint requires frequent
server-to-server communication in applications such as
key-value stores and graph-based applications that rely
on large irregular data structures. The fine-grained
nature of the accesses is a poor match to commodity
networking technologies, including RDMA, which incur
delays of 10--1000$ \times $ over local DRAM
operations. We introduce Scale-Out NUMA (soNUMA) --- an
architecture, programming model, and communication
protocol for low-latency, distributed in-memory
processing. soNUMA layers an RDMA-inspired programming
model directly on top of a NUMA memory fabric via a
stateless messaging protocol. To facilitate
interactions between the application, OS, and the
fabric, soNUMA relies on the remote memory controller a
new architecturally-exposed hardware block integrated
into the node's local coherence hierarchy. Our results
based on cycle-accurate full-system simulation show
that soNUMA performs remote reads at latencies that are
within 4$ \times $ of local DRAM, can fully utilize the
available memory bandwidth, and can issue up to 10M
remote memory operations per second per core.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Agrawal:2014:RHD,
author = "Sandeep R. Agrawal and Valentin Pistol and Jun Pang
and John Tran and David Tarjan and Alvin R. Lebeck",
title = "{Rhythm}: harnessing data parallel hardware for server
workloads",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "19--34",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541956",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Trends in increasing web traffic demand an increase in
server throughput while preserving energy efficiency
and total cost of ownership. Present work in optimizing
data center efficiency primarily focuses on the data
center as a whole, using off-the-shelf hardware for
individual servers. Server capacity is typically
increased by adding more machines, which is cheap,
though inefficient in the long run in terms of energy
and area. Our work builds on the observation that
server workload execution patterns are not completely
unique across multiple requests. We present a
framework---called Rhythm---for high throughput servers
that can exploit similarity across requests to improve
server performance and power/energy efficiency by
launching data parallel executions for request cohorts.
An implementation of the SPECWeb Banking workload using
Rhythm on NVIDIA GPUs provides a basis for evaluating
both software and hardware for future cohort-based
servers. Our evaluation of Rhythm on future server
platforms shows that it achieves 4x the throughput
(reqs/sec) of a core i7 at efficiencies (reqs/Joule)
comparable to a dual core ARM Cortex A9. A Rhythm
implementation that generates transposed responses
achieves 8x the i7 throughput while processing 2.5x
more requests/Joule compared to the A9.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Samadi:2014:PPB,
author = "Mehrzad Samadi and Davoud Anoushe Jamshidi and
Janghaeng Lee and Scott Mahlke",
title = "{Paraprox}: pattern-based approximation for data
parallel applications",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "35--50",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541948",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Approximate computing is an approach where reduced
accuracy of results is traded off for increased speed,
throughput, or both. Loss of accuracy is not
permissible in all computing domains, but there are a
growing number of data-intensive domains where the
output of programs need not be perfectly correct to
provide useful results or even noticeable differences
to the end user. These soft domains include multimedia
processing, machine learning, and data mining/analysis.
An important challenge with approximate computing is
transparency to insulate both software and hardware
developers from the time, cost, and difficulty of using
approximation. This paper proposes a software-only
system, Paraprox, for realizing transparent
approximation of data-parallel programs that operates
on commodity hardware systems. Paraprox starts with a
data-parallel kernel implemented using OpenCL or CUDA
and creates a parameterized approximate kernel that is
tuned at runtime to maximize performance subject to a
target output quality (TOQ) that is supplied by the
user. Approximate kernels are created by recognizing
common computation idioms found in data-parallel
programs (e.g., Map, Scatter/Gather, Reduction, Scan,
Stencil, and Partition) and substituting approximate
implementations in their place. Across a set of 13 soft
data-parallel applications with at most 10\% quality
degradation, Paraprox yields an average performance
gain of 2.7x on a NVIDIA GTX 560 GPU and 2.5x on an
Intel Core i7 quad-core processor compared to accurate
execution on each platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Bornholt:2014:UFO,
author = "James Bornholt and Todd Mytkowicz and Kathryn S.
McKinley",
title = "{Uncertain$<$ t$>$}: a first-order type for uncertain
data",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "51--66",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541958",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging applications increasingly use estimates such
as sensor data (GPS), probabilistic models, machine
learning, big data, and human data. Unfortunately,
representing this uncertain data with discrete types
(floats, integers, and booleans) encourages developers
to pretend it is not probabilistic, which causes three
types of uncertainty bugs. (1) Using estimates as facts
ignores random error in estimates. (2) Computation
compounds that error. (3) Boolean questions on
probabilistic data induce false positives and
negatives. This paper introduces Uncertain Whereas
previous probabilistic programming languages focus on
experts, Uncertain",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Santos:2014:UAT,
author = "Nuno Santos and Himanshu Raj and Stefan Saroiu and
Alec Wolman",
title = "Using {ARM} trustzone to build a trusted language
runtime for mobile applications",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "67--80",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541949",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents the design, implementation, and
evaluation of the Trusted Language Runtime (TLR), a
system that protects the confidentiality and integrity
of .NET mobile applications from OS security breaches.
TLR enables separating an application's
security-sensitive logic from the rest of the
application, and isolates it from the OS and other
apps. TLR provides runtime support for the secure
component based on a .NET implementation for embedded
devices. TLR reduces the TCB of an open source .NET
implementation by a factor of $ 78 $ with a tolerable
performance cost. The main benefit of the TLR is to
bring the developer benefits of managed code to trusted
computing. With the TLR, developers can build their
trusted components with the productivity benefits of
modern high level languages, such as strong typing and
garbage collection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Criswell:2014:VGP,
author = "John Criswell and Nathan Dautenhahn and Vikram Adve",
title = "{Virtual Ghost}: protecting applications from hostile
operating systems",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "81--96",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541986",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Applications that process sensitive data can be
carefully designed and validated to be difficult to
attack, but they are usually run on monolithic,
commodity operating systems, which may be less secure.
An OS compromise gives the attacker complete access to
all of an application's data, regardless of how well
the application is built. We propose a new system,
Virtual Ghost, that protects applications from a
compromised or even hostile OS. Virtual Ghost is the
first system to do so by combining compiler
instrumentation and run-time checks on operating system
code, which it uses to create ghost memory that the
operating system cannot read or write. Virtual Ghost
interposes a thin hardware abstraction layer between
the kernel and the hardware that provides a set of
operations that the kernel must use to manipulate
hardware, and provides a few trusted services for
secure applications such as ghost memory management,
encryption and signing services, and key management.
Unlike previous solutions, Virtual Ghost does not use a
higher privilege level than the kernel. Virtual Ghost
performs well compared to previous approaches; it
outperforms InkTag on five out of seven of the LMBench
microbenchmarks with improvements between 1.3x and
14.3x. For network downloads, Virtual Ghost experiences
a 45\% reduction in bandwidth at most for small files
and nearly no reduction in bandwidth for large files
and web traffic. An application we modified to use
ghost memory shows a maximum additional overhead of 5\%
due to the Virtual Ghost protections. We also
demonstrate Virtual Ghost's efficacy by showing how it
defeats sophisticated rootkit attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Li:2014:SLH,
author = "Xun Li and Vineeth Kashyap and Jason K. Oberg and
Mohit Tiwari and Vasanth Ram Rajarathinam and Ryan
Kastner and Timothy Sherwood and Ben Hardekopf and
Frederic T. Chong",
title = "{Sapper}: a language for hardware-level security
policy enforcement",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "97--112",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541947",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Privacy and integrity are important security concerns.
These concerns are addressed by controlling information
flow, i.e., restricting how information can flow
through a system. Most proposed systems that restrict
information flow make the implicit assumption that the
hardware used by the system is fully ``correct'' and
that the hardware's instruction set accurately
describes its behavior in all circumstances. The truth
is more complicated: modern hardware designs defy
complete verification; many aspects of the timing and
ordering of events are left totally unspecified; and
implementation bugs present themselves with surprising
frequency. In this work we describe Sapper, a novel
hardware description language for designing
security-critical hardware components. Sapper seeks to
address these problems by using static analysis at
compile-time to automatically insert dynamic checks in
the resulting hardware that provably enforce a given
information flow policy at execution time. We present
Sapper's design and formal semantics along with a proof
sketch of its security. In addition, we have
implemented a compiler for Sapper and used it to create
a non-trivial secure embedded processor with many
modern microarchitectural features. We empirically
evaluate the resulting hardware's area and energy
overhead and compare them with alternative designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Banabic:2014:FTM,
author = "Radu Banabic and George Candea and Rachid Guerraoui",
title = "Finding {Trojan} message vulnerabilities in
distributed systems",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "113--126",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541984",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Trojan messages are messages that seem correct to the
receiver but cannot be generated by any correct sender.
Such messages constitute major vulnerability points of
a distributed system---they constitute ideal targets
for a malicious actor and facilitate failure
propagation across nodes. We describe Achilles, a tool
that searches for Trojan messages in a distributed
system. Achilles uses dynamic white-box analysis on the
distributed system binaries in order to infer the
predicate that defines messages parsed by receiver
nodes and generated by sender nodes, respectively, and
then computes Trojan messages as the difference between
the two. We apply Achilles on implementations of real
distributed systems: FSP, a file transfer application,
and PBFT, a Byzantine-fault-tolerant state machine
replication library. Achilles discovered a new bug in
FSP and rediscovered a previously known vulnerability
in PBFT. In our evaluation we demonstrate that our
approach can perform orders of magnitude better than
general approaches based on regular fuzzing and
symbolic execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Delimitrou:2014:QRE,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{Quasar}: resource-efficient and {QoS}-aware cluster
management",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "127--144",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541941",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cloud computing promises flexibility and high
performance for users and high cost-efficiency for
operators. Nevertheless, most cloud facilities operate
at very low utilization, hurting both cost
effectiveness and future scalability. We present
Quasar, a cluster management system that increases
resource utilization while providing consistently high
application performance. Quasar employs three
techniques. First, it does not rely on resource
reservations, which lead to underutilization as users
do not necessarily understand workload dynamics and
physical resource requirements of complex codebases.
Instead, users express performance constraints for each
workload, letting Quasar determine the right amount of
resources to meet these constraints at any point.
Second, Quasar uses classification techniques to
quickly and accurately determine the impact of the
amount of resources (scale-out and scale-up), type of
resources, and interference on performance for each
workload and dataset. Third, it uses the classification
results to jointly perform resource allocation and
assignment, quickly exploring the large space of
options for an efficient way to pack workloads on
available resources. Quasar monitors workload
performance and adjusts resource allocation and
assignment when needed. We evaluate Quasar over a wide
range of workload scenarios, including combinations of
distributed analytics frameworks and low-latency,
stateful services, both on a local cluster and a
cluster of dedicated EC2 servers. At steady state,
Quasar improves resource utilization by 47\% in the
200-server EC2 cluster, while meeting performance
constraints for workloads of all types.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Zahedi:2014:RRE,
author = "Seyed Majid Zahedi and Benjamin C. Lee",
title = "{REF}: resource elasticity fairness with sharing
incentives for multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "145--160",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541962",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the democratization of cloud and datacenter
computing, users increasingly share large hardware
platforms. In this setting, architects encounter two
challenges: sharing fairly and sharing multiple
resources. Drawing on economic game-theory, we rethink
fairness in computer architecture. A fair allocation
must provide sharing incentives (SI), envy-freeness
(EF), and Pareto efficiency (PE). We show that
Cobb--Douglas utility functions are well suited to
modeling user preferences for cache capacity and memory
bandwidth. And we present an allocation mechanism that
uses Cobb--Douglas preferences to determine each user's
fair share of the hardware. This mechanism provably
guarantees SI, EF, and PE, as well as
strategy-proofness in the large (SPL). And it does so
with modest performance penalties, less than 10\\%
throughput loss, relative to an unfair mechanism.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Muthukaruppan:2014:PTB,
author = "Thannirmalai Somu Muthukaruppan and Anuj Pathania and
Tulika Mitra",
title = "Price theory based power management for heterogeneous
multi-cores",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "161--176",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541974",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Heterogeneous multi-cores that integrate cores with
different power performance characteristics are
promising alternatives to homogeneous systems in
energy- and thermally constrained environments.
However, the heterogeneity imposes significant
challenges to power-aware scheduling. We present a
price theory-based dynamic power management framework
for heterogeneous multi-cores that co-ordinates various
energy savings opportunities, such as dynamic
voltage/frequency scaling, load balancing, and task
migration in tandem, to achieve the best
power-performance characteristics. Unlike existing
centralized power management frameworks, ours is
distributed and hence scalable with minimal runtime
overhead. We design and implement the framework within
Linux operating system on ARM big.LITTLE heterogeneous
multi-core platform. Experimental evaluation confirms
the advantages of our approach compared to the
state-of-the-art techniques for power management in
heterogeneous multi-cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Wang:2014:UBP,
author = "Di Wang and Sriram Govindan and Anand Sivasubramaniam
and Aman Kansal and Jie Liu and Badriddine Khessib",
title = "Underprovisioning backup power infrastructure for
datacenters",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "177--192",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541966",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "While there has been prior work to underprovision the
power distribution infrastructure for a datacenter to
save costs, the ability to underprovision the backup
power infrastructure, which contributes significantly
to capital costs, is little explored. There are two
main components in the backup infrastructure --- Diesel
Generators (DGs) and UPS units --- which can both be
underprovisioned (or even removed) in terms of their
power and/or energy capacities. However, embarking on
such underprovisioning mandates studying several
ramifications --- the resulting cost savings, the lower
availability, and the performance and state loss
consequences on individual applications ---
concurrently. This paper presents the first such study,
considering cost, availability, performance and
application consequences of underprovisioning the
backup power infrastructure. We present a framework to
quantify the cost of backup capacity that is
provisioned, and implement techniques leveraging
existing software and hardware mechanisms to provide as
seamless an operation as possible for an application
within the provisioned backup capacity during a power
outage. We evaluate the cost-performance-availability
trade-offs for different levels of backup
underprovisioning for applications with diverse
reliance on the backup infrastructure. Our results show
that one may be able to completely do away with DGs,
compensating for it with additional UPS energy
capacities, to significantly cut costs and still be
able to handle power outages lasting as high as 40
minutes (which constitute bulk of the outages).
Further, we can push the limits of outage duration that
can be handled in a cost-effective manner, if
applications are willing to tolerate degraded
performance during the outage. Our evaluations also
show that different applications react differently to
the outage handling mechanisms, and that the efficacy
of the mechanisms is sensitive to the outage duration.
The insights from this paper can spur new opportunities
for future work on backup power infrastructure
optimization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Yu:2014:CPR,
author = "Xiao Yu and Shi Han and Dongmei Zhang and Tao Xie",
title = "Comprehending performance from real-world execution
traces: a device-driver case",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "193--206",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541968",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Real-world execution traces record performance
problems that are likely perceived at deployment sites.
However, those problems can be rooted subtly and deeply
into system layers or other components far from the
place where delays are initially observed. To tackle
challenges of identifying deeply rooted problems, we
propose a new trace-based approach consisting of two
steps: impact analysis and causality analysis. The
impact analysis measures performance impacts on a
component basis, and the causality analysis discovers
patterns of runtime behaviors that are likely to cause
the measured impacts. The discovered patterns can help
performance analysts quickly identify root causes of
perceived performance problems. We instantiate our
approach to study the performance of device drivers on
over 19,500 real-world execution traces. The impact
analysis shows that device drivers constitute a
non-trivial part ($ \approx 38$) in the overall system
performance, and a big part ($ \approx 26$) is due to
interactions between drivers. The causality analysis
effectively discovers highly suspicious and high-impact
behavioral patterns in device drivers, examined and
confirmed by our automated evaluation, developers, and
performance analysts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Arulraj:2014:LST,
author = "Joy Arulraj and Guoliang Jin and Shan Lu",
title = "Leveraging the short-term memory of hardware to
diagnose production-run software failures",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "207--222",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541973",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Failures caused by software bugs are widespread in
production runs, causing severe losses for end users.
Unfortunately, diagnosing production-run failures is
challenging. Existing work cannot satisfy privacy,
run-time overhead, diagnosis capability, and diagnosis
latency requirements all at once. This paper designs a
low overhead, low latency, privacy preserving
production-run failure diagnosis system based on two
observations. First, short-term memory of program
execution is often sufficient for failure diagnosis, as
many bugs have short propagation distances. Second,
maintaining a short-term memory of execution is much
cheaper than maintaining a record of the whole
execution. Following these observations, we first
identify an existing hardware unit, Last Branch Record
(LBR), that records the last few taken branches to help
diagnose sequential bugs. We then propose a simple
hardware extension, Last Cache-coherence Record (LCR),
to record the last few cache accesses with specified
coherence states and hence help diagnose concurrency
bugs. Finally, we design LBRA and LCRA to automatically
locate failure root causes using LBR and LCR. Our
evaluation uses 31 real-world sequential and
concurrency bug failures from 18 representative
open-source software. The results show that with just
16 record entries, LBR and LCR enable our system to
automatically locate the root causes for 27 out of 31
failures, with less than 3\% run-time overhead. As our
system does not rely on sampling, \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Honarmand:2014:RRR,
author = "Nima Honarmand and Josep Torrellas",
title = "{RelaxReplay}: record and replay for
relaxed-consistency multiprocessors",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "223--238",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541979",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Record and Deterministic Replay (RnR) of multithreaded
programs on relaxed-consistency multiprocessors has
been a long-standing problem. While there are designs
that work for Total Store Ordering (TSO), finding a
general solution that is able to record the access
reordering allowed by any relaxed-consistency model has
proved challenging. This paper presents the first
complete solution for hard-ware-assisted memory race
recording that works for any relaxed-consistency model
of current processors. With the scheme, called
RelaxReplay, we can build an RnR system for any
relaxed-consistency model and coherence protocol.
RelaxReplay's core innovation is a new way of capturing
memory access reordering. Each memory instruction goes
through a post-completion in-order counting step that
detects any reordering, and efficiently records it. We
evaluate RelaxReplay with simulations of an 8-core
release-consistent multicore running SPLASH-2 programs.
We observe that RelaxReplay induces negligible overhead
during recording. In addition, the average size of the
log produced is comparable to the log sizes reported
for existing solutions, and still very small compared
to the memory bandwidth of modern machines. Finally,
deterministic replay is efficient and needs minimal
hardware support.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Bucur:2014:PSE,
author = "Stefan Bucur and Johannes Kinder and George Candea",
title = "Prototyping symbolic execution engines for interpreted
languages",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "239--254",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541977",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Symbolic execution is being successfully used to
automatically test statically compiled code. However,
increasingly more systems and applications are written
in dynamic interpreted languages like Python. Building
a new symbolic execution engine is a monumental effort,
and so is keeping it up-to-date as the target language
evolves. Furthermore, ambiguous language specifications
lead to their implementation in a symbolic execution
engine potentially differing from the production
interpreter in subtle ways. We address these challenges
by flipping the problem and using the interpreter
itself as a specification of the language semantics. We
present a recipe and tool (called Chef) for turning a
vanilla interpreter into a sound and complete symbolic
execution engine. Chef symbolically executes the target
program by symbolically executing the interpreter's
binary while exploiting inferred knowledge about the
program's high-level structure. Using Chef, we
developed a symbolic execution engine for Python in 5
person-days and one for Lua in 3 person-days. They
offer complete and faithful coverage of language
features in a way that keeps up with future language
versions at near-zero cost. Chef-produced engines are
up to 1000 times more performant than if directly
executing the interpreter symbolically without Chef.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Wu:2014:QAD,
author = "Lisa Wu and Andrea Lottarini and Timothy K. Paine and
Martha A. Kim and Kenneth A. Ross",
title = "{Q100}: the architecture and design of a database
processing unit",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "255--268",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541961",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we propose Database Processing Units,
or DPUs, a class of domain-specific database processors
that can efficiently handle database applications. As a
proof of concept, we present the instruction set
architecture, microarchitecture, and hardware
implementation of one DPU, called Q100. The Q100 has a
collection of heterogeneous ASIC tiles that process
relational tables and columns quickly and
energy-efficiently. The architecture uses coarse
grained instructions that manipulate streams of data,
thereby maximizing pipeline and data parallelism, and
minimizing the need to time multiplex the accelerator
tiles and spill inter- mediate results to memory. This
work explores a Q100 de- sign space of 150
configurations, selecting three for further analysis: a
small, power-conscious implementation, a high-
performance implementation, and a balanced design that
maximizes performance per Watt. We then demonstrate
that the power-conscious Q100 handles the TPC-H queries
with three orders of magnitude less energy than a state
of the art software DBMS, while the
performance-oriented design out- performs the same DBMS
by 70X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Chen:2014:DSF,
author = "Tianshi Chen and Zidong Du and Ninghui Sun and Jia
Wang and Chengyong Wu and Yunji Chen and Olivier
Temam",
title = "{DianNao}: a small-footprint high-throughput
accelerator for ubiquitous machine-learning",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "269--284",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541967",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Machine-Learning tasks are becoming pervasive in a
broad range of domains, and in a broad range of systems
(from embedded systems to data centers). At the same
time, a small set of machine-learning algorithms
(especially Convolutional and Deep Neural Networks,
i.e., CNNs and DNNs) are proving to be state-of-the-art
across many applications. As architectures evolve
towards heterogeneous multi-cores composed of a mix of
cores and accelerators, a machine-learning accelerator
can achieve the rare combination of efficiency (due to
the small number of target algorithms) and broad
application scope. Until now, most machine-learning
accelerator designs have focused on efficiently
implementing the computational part of the algorithms.
However, recent state-of-the-art CNNs and DNNs are
characterized by their large size. In this study, we
design an accelerator for large-scale CNNs and DNNs,
with a special emphasis on the impact of memory on
accelerator design, performance and energy. We show
that it is possible to design an accelerator with a
high throughput, capable of performing 452 GOP/s (key
NN operations such as synaptic weight multiplications
and neurons outputs additions) in a small footprint of
3.02 mm2 and 485 mW; compared to a 128-bit 2GHz SIMD
processor, the accelerator is 117.87x faster, and it
can reduce the total energy by 21.08x. The accelerator
characteristics are obtained after layout at 65 nm.
Such a high throughput in a small footprint can open up
the usage of state-of-the-art machine-learning
algorithms in a broad set of systems and for a broad
set of applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Lin:2014:KMO,
author = "Felix Xiaozhu Lin and Zhen Wang and Lin Zhong",
title = "{K2}: a mobile operating system for heterogeneous
coherence domains",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "285--300",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541975",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Mobile System-on-Chips (SoC) that incorporate
heterogeneous coherence domains promise high energy
efficiency to a wide range of mobile applications, yet
are difficult to program. To exploit the architecture,
a desirable, yet missing capability is to replicate
operating system (OS) services over multiple coherence
domains with minimum inter-domain communication. In
designing such an OS, we set three goals: to ease
application development, to simplify OS engineering,
and to preserve the current OS performance. To this
end, we identify a shared-most OS model for multiple
coherence domains: creating per-domain instances of
core OS services with no shared state, while enabling
other extended OS services to share state across
domains. To test the model, we build K2, a prototype OS
on the TI OMAP4 SoC, by reusing most of the Linux 3.4
source. K2 presents a single system image to
applications with its two kernels running on top of the
two coherence domains of OMAP4. The two kernels have
independent instances of core OS services, such as page
allocator and interrupt management, as coordinated by
K2; the two kernels share most extended OS services,
such as device drivers, whose state is kept coherent
transparently by K2. Despite platform constraints and
unoptimized code, K2 improves energy efficiency for
light OS workloads by 8x-10x, while incurring less than
6\% performance overhead for a device driver shared
between kernels. Our experiences with K2 show that the
shared-most model is promising.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Menychtas:2014:DSF,
author = "Konstantinos Menychtas and Kai Shen and Michael L.
Scott",
title = "Disengaged scheduling for fair, protected access to
fast computational accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "301--316",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541963",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Today's operating systems treat GPUs and other
computational accelerators as if they were simple
devices, with bounded and predictable response times.
With accelerators assuming an increasing share of the
workload on modern machines, this strategy is already
problematic, and likely to become untenable soon. If
the operating system is to enforce fair sharing of the
machine, it must assume responsibility for accelerator
scheduling and resource management. Fair, safe
scheduling is a particular challenge on fast
accelerators, which allow applications to avoid
kernel-crossing overhead by interacting directly with
the device. We propose a disengaged scheduling strategy
in which the kernel intercedes between applications and
the accelerator on an infrequent basis, to monitor
their use of accelerator cycles and to determine which
applications should be granted access over the next
time interval. Our strategy assumes a well defined,
narrow interface exported by the accelerator. We build
upon such an interface, systematically inferred for the
latest Nvidia GPUs. We construct several example
schedulers, including Disengaged Timeslice with overuse
control that guarantees fairness and Disengaged Fair
Queueing that is effective in limiting resource
idleness, but probabilistic. Both schedulers ensure
fair sharing of the GPU, even among uncooperative or
adversarial applications; Disengaged Fair Queueing
incurs a 4\% overhead on average (max 18\%) compared to
direct device access across our evaluation scenarios.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Gehlhaar:2014:NPN,
author = "Jeff Gehlhaar",
title = "Neuromorphic processing: a new frontier in scaling
computer architecture",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "317--318",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2564710",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The desire to build a computer that operates in the
same manner as our brains is as old as the computer
itself. Although computer engineering has made great
strides in hardware performance as a result of Dennard
scaling, and even great advances in 'brain like'
computation, the field still struggles to move beyond
sequential, analytical computing architectures.
Neuromorphic systems are being developed to transcend
the barriers imposed by silicon power consumption,
develop new algorithms that help machines achieve
cognitive behaviors, and both exploit and enable
further research in neuroscience. In this talk I will
discuss a system implementing spiking neural networks.
These systems hold the promise of an architecture that
is event based, broad and shallow, and thus more power
efficient than conventional computing solutions. This
new approach to computation based on modeling the brain
and its simple but highly connected units presents a
host of new challenges. Hardware faces tradeoffs such
as density or lower power at the cost of high
interconnection overhead. Consequently, software
systems must face choices about new language design.
Highly distributed hardware systems require complex
place and route algorithms to distribute the execution
of the neural network across a large number of highly
interconnected processing units. Finally, the overall
design, simulation and testing process has to be
entirely reimagined. We discuss these issues in the
context of the Zeroth processor and how this approach
compares to other neuromorphic systems that are
becoming available.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Sani:2014:PDF,
author = "Ardalan Amiri Sani and Kevin Boos and Shaopu Qin and
Lin Zhong",
title = "{I/O} paravirtualization at the device file boundary",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "319--332",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541943",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Paravirtualization is an important I/O virtualization
technology since it uniquely provides all of the
following benefits: the ability to share the device
between multiple VMs, support for legacy devices
without virtualization hardware, and high performance.
However, existing paravirtualization solutions have one
main limitation: they only support one I/O device
class, and would require significant engineering effort
to support new device classes and features. In this
paper, we present Paradice, a solution that vastly
simplifies I/O paravirtualization by using a common
paravirtualization boundary for various I/O device
classes: Unix device files. Using this boundary, the
paravirtual drivers simply act as a class-agnostic
indirection layer between the application and the
actual device driver. We address two fundamental
challenges: supporting cross-VM driver memory
operations without changes to applications or device
drivers and providing fault and device data isolation
between guest VMs despite device driver bugs. We
implement Paradice for x86, the Xen hypervisor, and the
Linux and FreeBSD OSes. Our implementation
paravirtualizes various GPUs, input devices, cameras,
an audio device, and an Ethernet card for the netmap
framework with ~7700 LoC, of which only ~900 are device
class-specific. Our measurements show that Paradice
achieves performance close to native for different
devices and applications including netmap, 3D HD games,
and OpenCL applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Dall:2014:KAD,
author = "Christoffer Dall and Jason Nieh",
title = "{{KVM\slash} ARM}: the design and implementation of
the {Linux ARM} hypervisor",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "333--348",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541946",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As ARM CPUs become increasingly common in mobile
devices and servers, there is a growing demand for
providing the benefits of virtualization for ARM-based
devices. We present our experiences building the Linux
ARM hypervisor, KVM/ARM, the first full system ARM
virtualization solution that can run unmodified guest
operating systems on ARM multicore hardware. KVM/ARM
introduces split-mode virtualization, allowing a
hypervisor to split its execution across CPU modes and
be integrated into the Linux kernel. This allows
KVM/ARM to leverage existing Linux hardware support and
functionality to simplify hypervisor development and
maintainability while utilizing recent ARM hardware
virtualization extensions to run virtual machines with
comparable performance to native execution. KVM/ARM has
been successfully merged into the mainline Linux
kernel, ensuring that it will gain wide adoption as the
virtualization platform of choice for ARM. We provide
the first measurements on real hardware of a complete
hypervisor using ARM hardware virtualization support.
Our results demonstrate that KVM/ARM has modest
virtualization performance and power costs, and can
achieve lower performance and power costs compared to
x86-based Linux virtualization on multicore hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Amit:2014:VMS,
author = "Nadav Amit and Dan Tsafrir and Assaf Schuster",
title = "{VSwapper}: a memory swapper for virtualized
environments",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "349--366",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541969",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The number of guest virtual machines that can be
consolidated on one physical host is typically limited
by the memory size, motivating memory overcommitment.
Guests are given a choice to either install a
``balloon'' driver to coordinate the overcommitment
activity, or to experience degraded performance due to
uncooperative swapping. Ballooning, however, is not a
complete solution, as hosts must still fall back on
uncooperative swapping in various circumstances.
Additionally, ballooning takes time to accommodate
change, and so guests might experience degraded
performance under changing conditions. Our goal is to
improve the performance of hosts when they fall back on
uncooperative swapping and/or operate under changing
load conditions. We carefully isolate and characterize
the causes for the associated poor performance, which
include various types of superfluous swap operations,
decayed swap file sequentiality, and ineffective
prefetch decisions upon page faults. We address these
problems by implementing VSwapper, a guest-agnostic
memory swapper for virtual environments that allows
efficient, uncooperative overcommitment. With inactive
ballooning, VSwapper yields up to an order of magnitude
performance improvement. Combined with ballooning,
VSwapper can achieve up to double the performance under
changing load conditions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Andrus:2014:CNE,
author = "Jeremy Andrus and Alexander Van't Hof and Naser
AlDuaij and Christoffer Dall and Nicolas Viennot and
Jason Nieh",
title = "{Cider}: native execution of {iOS} apps on {Android}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "367--382",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541972",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present Cider, an operating system compatibility
architecture that can run applications built for
different mobile ecosystems, iOS or Android, together
on the same smartphone or tablet. Cider enhances the
domestic operating system, Android, of a device with
kernel-managed, per-thread personas to mimic the
application binary interface of a foreign operating
system, iOS, enabling it to run unmodified foreign
binaries. This is accomplished using a novel
combination of binary compatibility techniques
including two new mechanisms: compile-time code
adaptation, and diplomatic functions. Compile-time code
adaptation enables existing unmodified foreign source
code to be reused in the domestic kernel, reducing
implementation effort required to support multiple
binary interfaces for executing domestic and foreign
applications. Diplomatic functions leverage per-thread
personas, and allow foreign applications to use
domestic libraries to access proprietary software and
hardware interfaces. We have built a Cider prototype,
and demonstrate that it imposes modest performance
overhead and runs unmodified iOS and Android
applications together on a Google Nexus tablet running
the latest version of Android.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Litz:2014:STR,
author = "Heiner Litz and David Cheriton and Amin Firoozshahian
and Omid Azizi and John P. Stevenson",
title = "{SI-TM}: reducing transactional memory abort rates
through snapshot isolation",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "383--398",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541952",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Transactional memory represents an attractive
conceptual model for programming concurrent
applications. Unfortunately, high transaction abort
rates can cause significant performance degradation.
Conventional transactional memory realizations not only
pessimistically abort transactions on every read-write
conflict but also because of false sharing, cache
evictions, TLB misses, page faults and interrupts.
Consequently, the use of transactions needs to be
restricted to a very small number of operations to
achieve predictable performance, thereby, limiting its
benefit to programming simplification. In this paper,
we investigate snapshot isolation transactional memory
in which transactions operate on memory snapshots that
always guarantee consistent reads. By exploiting
snapshots, an established database model of
transactions, transactions can ignore read-write
conflicts and only need to abort on write-write
conflicts. Our implementation utilizes a memory
controller that supports multiversion memory, to
efficiently support snapshotting in hardware.We show
that snapshot isolation can reduce the number of aborts
in some cases by three orders of magnitude and improve
performance by up to 20x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Ruan:2014:TLC,
author = "Wenjia Ruan and Trilok Vyas and Yujie Liu and Michael
Spear",
title = "Transactionalizing legacy code: an experience report
using {GCC} and {Memcached}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "399--412",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541960",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/gnu.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The addition of transactional memory (TM) support to
existing languages provides the opportunity to create
new software from scratch using transactions, and also
to simplify or extend legacy code by replacing existing
synchronization with language-level transactions. In
this paper, we describe our experiences
transactionalizing the memcached application through
the use of the GCC implementation of the Draft C++ TM
Specification. We present experiences and
recommendations that we hope will guide the effort to
integrate TM into languages, and that may also
contribute to the growing collective knowledge about
how programmers can begin to exploit TM in existing
production-quality software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Morrison:2014:FFW,
author = "Adam Morrison and Yehuda Afek",
title = "Fence-free work stealing on bounded {TSO} processors",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "413--426",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541987",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Work stealing is the method of choice for load
balancing in task parallel programming languages and
frameworks. Yet despite considerable effort invested in
optimizing work stealing task queues, existing
algorithms issue a costly memory fence when removing a
task, and these fences are believed to be necessary for
correctness. This paper refutes this belief,
demonstrating work stealing algorithms in which a
worker does not issue a memory fence for
microarchitectures with a bounded total store ordering
(TSO) memory model. Bounded TSO is a novel restriction
of TSO --- capturing mainstream x86 and SPARC TSO
processors --- that bounds the number of stores a load
can be reordered with. Our algorithms eliminate the
memory fence penalty, improving the running time of a
suite of parallel benchmarks on modern x86 multicore
processors by 7\%-11\% on average (and up to 23\%),
compared to the Cilk and Chase--Lev work stealing
queues.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Hower:2014:HRF,
author = "Derek R. Hower and Blake A. Hechtman and Bradford M.
Beckmann and Benedict R. Gaster and Mark D. Hill and
Steven K. Reinhardt and David A. Wood",
title = "Heterogeneous-race-free memory models",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "427--440",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541981",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Commodity heterogeneous systems (e.g., integrated CPUs
and GPUs), now support a unified, shared memory address
space for all components. Because the latency of global
communication in a heterogeneous system can be
prohibitively high, heterogeneous systems (unlike
homogeneous CPU systems) provide synchronization
mechanisms that only guarantee ordering among a subset
of threads, which we call a scope. Unfortunately, the
consequences and semantics of these scoped operations
are not yet well understood. Without a formal and
approachable model to reason about the behavior of
these operations, we risk an array of portability and
performance issues. In this paper, we embrace scoped
synchronization with a new class of memory consistency
models that add scoped synchronization to
data-race-free models like those of C++ and Java.
Called sequential consistency for
heterogeneous-race-free (SC for HRF), the new models
guarantee SC for programs with ``sufficient''
synchronization (no data races) of ``sufficient''
scope. We discuss two such models. The first,
HRF-direct, works well for programs with highly regular
parallelism. The second, HRF-indirect, builds on
HRF-direct by allowing synchronization using different
scopes in some cases involving transitive
communication. We quantitatively show that HRF-indirect
encourages forward-looking programs with irregular
parallelism by showing up to a 10\% performance
increase in a task runtime for GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Jung:2014:TNS,
author = "Myoungsoo Jung and Wonil Choi and John Shalf and
Mahmut Taylan Kandemir",
title = "{Triple-A}: a Non-{SSD} based autonomic all-flash
array for high performance storage systems",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "441--454",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541953",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Solid State Disk (SSD) arrays are in a position to (as
least partially) replace spinning disk arrays in high
performance computing (HPC) systems due to their better
performance and lower power consumption. However, these
emerging SSD arrays are facing enormous challenges,
which are not observed in disk-based arrays.
Specifically, we observe that the performance of SSD
arrays can significantly degrade due to various
array-level resource contentions. In addition, their
maintenance costs exponentially increase over time,
which renders them difficult to deploy widely in HPC
systems. To address these challenges, we propose
Triple-A, a non-SSD based Autonomic All-Flash Array,
which is a self-optimizing, from-scratch NAND flash
cluster. Triple-A can detect two different types of
resource contentions and autonomically alleviate them
by reshaping the physical data-layout on its flash
array network. Our experimental evaluation using both
real workloads and a micro-benchmark show that Triple-A
can offer a 53\% higher sustained throughput and a 80\%
lower I/O latency than non-autonomic SSD arrays.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Liu:2014:NDU,
author = "Ren-Shuo Liu and De-Yu Shen and Chia-Lin Yang and
Shun-Chih Yu and Cheng-Yuan Michael Wang",
title = "{NVM} duet: unified working memory and persistent
store architecture",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "455--470",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541957",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging non-volatile memory (NVM) technologies have
gained a lot of attention recently. The
byte-addressability and high density of NVM enable
computer architects to build large-scale main memory
systems. NVM has also been shown to be a promising
alternative to conventional persistent store. With NVM,
programmers can persistently retain in-memory data
structures without writing them to disk. Therefore, one
can envision that in the future, NVM will play the role
of both working memory and persistent store at the same
time. Persistent store demands consistency and
durability guarantees, thereby imposing new design
constraints on the memory system. Consistency is
achieved at the expense of serializing multiple write
operations. Durability requires memory cells to
guarantee non-volatility and thus reduces the write
speed. Therefore, a unified architecture oblivious to
these two use cases would lead to suboptimal design. In
this paper, we propose a novel unified working memory
and persistent store architecture, NVM Duet, which
provides the required consistency and durability
guarantees for persistent store while relaxing these
constraints if accesses to NVM are for working memory.
A cross-layer design approach is adopted to achieve the
design goal. Overall, simulation results demonstrate
that NVM Duet achieves up to 1.68x (1.32x on average)
speedup compared with the baseline design.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Ouyang:2014:SSD,
author = "Jian Ouyang and Shiding Lin and Song Jiang and Zhenyu
Hou and Yong Wang and Yuanzheng Wang",
title = "{SDF}: software-defined flash for {Web}-scale
{Internet} storage systems",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "471--484",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541959",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the last several years hundreds of thousands of
SSDs have been deployed in the data centers of Baidu,
China's largest Internet search company. Currently only
40\\% or less of the raw bandwidth of the flash memory
in the SSDs is delivered by the storage system to the
applications. Moreover, because of space
over-provisioning in the SSD to accommodate
non-sequential or random writes, and additionally,
parity coding across flash channels, typically only
50-70\\% of the raw capacity of a commodity SSD can be
used for user data. Given the large scale of Baidu's
data center, making the most effective use of its SSDs
is of great importance. Specifically, we seek to
maximize both bandwidth and usable capacity. To achieve
this goal we propose {\em software-defined flash}
(SDF), a hardware/software co-designed storage system
to maximally exploit the performance characteristics of
flash memory in the context of our workloads. SDF
exposes individual flash channels to the host software
and eliminates space over-provisioning. The host
software, given direct access to the raw flash channels
of the SSD, can effectively organize its data and
schedule its data access to better realize the SSD's
raw performance potential. Currently more than 3000
SDFs have been deployed in Baidu's storage system that
supports its web page and image repository services.
Our measurements show that SDF can deliver
approximately 95\% of the raw flash bandwidth and
provide 99\% of the flash capacity for user data. SDF
increases I/O bandwidth by 300\\% and reduces per-GB
hardware cost by 50\% on average compared with the
commodity-SSD-based system used at Baidu.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Gutierrez:2014:ISS,
author = "Anthony Gutierrez and Michael Cieslak and Bharan
Giridhar and Ronald G. Dreslinski and Luis Ceze and
Trevor Mudge",
title = "Integrated {$3$D}-stacked server designs for
increasing physical density of key-value stores",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "485--498",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541951",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Key-value stores, such as Memcached, have been used to
scale web services since the beginning of the Web 2.0
era. Data center real estate is expensive, and several
industry experts we have spoken to have suggested that
a significant portion of their data center space is
devoted to key value stores. Despite its wide-spread
use, there is little in the way of hardware
specialization for increasing the efficiency and
density of Memcached; it is currently deployed on
commodity servers that contain high-end CPUs designed
to extract as much instruction-level parallelism as
possible. Out-of-order CPUs, however have been shown to
be inefficient when running Memcached. To address
Memcached efficiency issues, we propose two
architectures using 3D stacking to increase data
storage efficiency. Our first 3D architecture, Mercury,
consists of stacks of ARM Cortex-A7 cores with 4GB of
DRAM, as well as NICs. Our second architecture,
Iridium, replaces DRAM with NAND Flash to improve
density. We explore, through simulation, the potential
efficiency benefits of running Memcached on servers
that use 3D-stacking to closely integrate low-power
CPUs with NICs and memory. With Mercury we demonstrate
that density may be improved by 2.9X, power efficiency
by 4.9X, throughput by 10X, and throughput per GB by
3.5X over a state-of-the-art server running optimized
Memcached. With Iridium we show that density may be
increased by 14X, power efficiency by 2.4X, and
throughput by 5.2X, while still meeting latency
requirements for a majority of requests.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Nguyen:2014:DGD,
author = "Donald Nguyen and Andrew Lenharth and Keshav Pingali",
title = "Deterministic {Galois}: on-demand, portable and
parameterless",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "499--512",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541964",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Non-determinism in program execution can make program
development and debugging difficult. In this paper, we
argue that solutions to this problem should be
on-demand, portable and parameterless. On-demand means
that the programming model should permit the writing of
non-deterministic programs since these programs often
perform better than deterministic ones for the same
problem. Portable means that the program should produce
the same answer even if it is run on different
machines. Parameterless means that if there are
machine-dependent scheduling parameters that must be
tuned for good performance, they must not affect the
output. Although many solutions for deterministic
program execution have been proposed in the literature,
they fall short along one or more of these dimensions.
To remedy this, we propose a new approach, based on the
Galois programming model, in which (i) the programming
model permits the writing of non-deterministic programs
and (ii) the runtime system executes these programs
deterministically if needed. Evaluation of this
approach on a collection of benchmarks from the PARSEC,
PBBS, and Lonestar suites shows that it delivers
deterministic execution with substantially less
overhead than other systems in the literature.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Ribic:2014:EEW,
author = "Haris Ribic and Yu David Liu",
title = "Energy-efficient work-stealing language runtimes",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "513--528",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541971",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Work stealing is a promising approach to constructing
multithreaded program runtimes of parallel programming
languages. This paper presents HERMES, an
energy-efficient work-stealing language runtime. The
key insight is that threads in a work-stealing
environment --- thieves and victims --- have varying
impacts on the overall program running time, and a
coordination of their execution ``tempo'' can lead to
energy efficiency with minimal performance loss. The
centerpiece of HERMES is two complementary algorithms
to coordinate thread tempo: the workpath-sensitive
algorithm determines tempo for each thread based on
thief-victim relationships on the execution path,
whereas the workload-sensitive algorithm selects
appropriate tempo based on the size of work-stealing
deques. We construct HERMES on top of Intel Cilk Plus's
runtime, and implement tempo adjustment through
standard Dynamic Voltage and Frequency Scaling (DVFS).
Benchmarks running on HERMES demonstrate an average of
11-12\% energy savings with an average of 3-4\%
performance loss through meter-based measurements over
commercial CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Mytkowicz:2014:DPF,
author = "Todd Mytkowicz and Madanlal Musuvathi and Wolfram
Schulte",
title = "Data-parallel finite-state machines",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "529--542",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541988",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A finite-state machine (FSM) is an important
abstraction for solving several problems, including
regular-expression matching, tokenizing text, and
Huffman decoding. FSM computations typically involve
data-dependent iterations with unpredictable
memory-access patterns making them difficult to
parallelize. This paper describes a parallel algorithm
for FSMs that breaks dependences across iterations by
efficiently enumerating transitions from all possible
states on each input symbol. This allows the algorithm
to utilize various sources of data parallelism
available on modern hardware, including vector
instructions and multiple processors/cores. For
instance, on benchmarks from three FSM applications:
regular expressions, Huffman decoding, and HTML
tokenization, the parallel algorithm achieves up to a
3x speedup over optimized sequential baselines on a
single core, and linear speedups up to 21x on 8
cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Zhao:2014:CES,
author = "Zhijia Zhao and Bo Wu and Xipeng Shen",
title = "Challenging the {``embarrassingly sequential''}:
parallelizing finite state machine-based computations
through principled speculation",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "543--558",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541989",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Finite-State Machine (FSM) applications are important
for many domains. But FSM computation is inherently
sequential, making such applications notoriously
difficult to parallelize. Most prior methods address
the problem through speculations on simple heuristics,
offering limited applicability and inconsistent
speedups. This paper provides some principled
understanding of FSM parallelization, and offers the
first disciplined way to exploit application-specific
information to inform speculations for parallelization.
Through a series of rigorous analysis, it presents a
probabilistic model that captures the relations between
speculative executions and the properties of the target
FSM and its inputs. With the formulation, it proposes
two model-based speculation schemes that automatically
customize themselves with the suitable configurations
to maximize the parallelization benefits. This rigorous
treatment yields near-linear speedup on applications
that state-of-the-art techniques can barely
accelerate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Zhou:2014:SAS,
author = "Yanqi Zhou and David Wentzlaff",
title = "The sharing architecture: sub-core configurability for
{IaaS} clouds",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "559--574",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541950",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Businesses and Academics are increasingly turning to
Infrastructure as a Service (IaaS) Clouds such as
Amazon's Elastic Compute Cloud (EC2) to fulfill their
computing needs. Unfortunately, current IaaS systems
provide a severely restricted pallet of rentable
computing options which do not optimally fit the
workloads that they are executing. We address this
challenge by proposing and evaluating a manycore
architecture, called the Sharing Architecture,
specifically optimized for IaaS systems by being
reconfigurable on a sub-core basis. The Sharing
Architecture enables better matching of workload to
micro-architecture resources by replacing static cores
with Virtual Cores which can be dynamically
reconfigured to have different numbers of ALUs and
amount of Cache. This reconfigurability enables many of
the same benefits of heterogeneous multicores, but in a
homogeneous fabric, and enables the reuse and resale of
resources on a per ALU or per KB of cache basis. The
Sharing Architecture leverages Distributed ILP
techniques, but is designed in a way to be independent
of recompilation. In addition, we introduce an economic
model which is enabled by the Sharing Architecture and
show how different users who have varying needs can be
better served by such a flexible architecture. We
evaluate the Sharing Architecture across a benchmark
suite of Apache, SPECint, and parts of PARSEC, and find
that it can achieve up to a 5x more economically
efficient market when compared to static architecture
multicores. We implemented the Sharing Architecture in
Verilog and present area overhead results.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Waterland:2014:AAS,
author = "Amos Waterland and Elaine Angelino and Ryan P. Adams
and Jonathan Appavoo and Margo Seltzer",
title = "{ASC}: automatically scalable computation",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "575--590",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541985",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present an architecture designed to transparently
and automatically scale the performance of sequential
programs as a function of the hardware resources
available. The architecture is predicated on a model of
computation that views program execution as a walk
through the enormous state space composed of the memory
and registers of a single-threaded processor. Each
instruction execution in this model moves the system
from its current point in state space to a
deterministic subsequent point. We can parallelize such
execution by predictively partitioning the complete
path and speculatively executing each partition in
parallel. Accurately partitioning the path is a
challenging prediction problem. We have implemented our
system using a functional simulator that emulates the
x86 instruction set, including a collection of state
predictors and a mechanism for speculatively executing
threads that explore potential states along the
execution path. While the overhead of our simulation
makes it impractical to measure speedup relative to
native x86 execution, experiments on three benchmarks
show scalability of up to a factor of 256 on a 1024
core machine when executing unmodified sequential
programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Eyerman:2014:BSM,
author = "Stijn Eyerman and Lieven Eeckhout",
title = "The benefit of {SMT} in the multi-core era:
flexibility towards degrees of thread-level
parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "591--606",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541954",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The number of active threads in a multi-core processor
varies over time and is often much smaller than the
number of supported hardware threads. This requires
multi-core chip designs to balance core count and
per-core performance. Low active thread counts benefit
from a few big, high-performance cores, while high
active thread counts benefit more from a sea of small,
energy-efficient cores. This paper comprehensively
studies the trade-offs in multi-core design given
dynamically varying active thread counts. We find that,
under these workload conditions, a homogeneous
multi-core processor, consisting of a few
high-performance SMT cores, typically outperforms
heterogeneous multi-cores consisting of a mix of big
and small cores (without SMT), within the same power
budget. We also show that a homogeneous multi-core
performs almost as well as a heterogeneous multi-core
that also implements SMT, as well as a dynamic
multi-core, while being less complex to design and
verify. Further, heterogeneous multi-cores that
power-gate idle cores yield (only) slightly better
energy-efficiency compared to homogeneous multi-cores.
The overall conclusion is that the benefit of SMT in
the multi-core era is to provide flexibility with
respect to the available thread-level parallelism.
Consequently, homogeneous multi-cores with big SMT
cores are competitive high-performance,
energy-efficient design points for workloads with
dynamically varying active thread counts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Ding:2014:FLE,
author = "Yufei Ding and Mingzhou Zhou and Zhijia Zhao and Sarah
Eisenstat and Xipeng Shen",
title = "Finding the limit: examining the potential and
complexity of compilation scheduling for {JIT}-based
runtime systems",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "607--622",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541945",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This work aims to find out the full potential of
compilation scheduling for JIT-based runtime systems.
Compilation scheduling determines the order in which
the compilation units (e.g., functions) in a program
are to be compiled or recompiled. It decides when what
versions of the units are ready to run, and hence
affects performance. But it has been a largely
overlooked direction in JIT-related research, with some
fundamental questions left open: How significant
compilation scheduling is for performance, how good the
scheduling schemes employed by existing runtime systems
are, and whether a great potential exists for
improvement. This study proves the strong
NP-completeness of the problem, proposes a heuristic
algorithm that yields near optimal schedules, examines
the potential of two current scheduling schemes
empirically, and explores the relations with JIT
designs. It provides the first principled understanding
to the complexity and potential of compilation
scheduling, shedding some insights for JIT-based
runtime system improvement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Lupon:2014:SHS,
author = "Marc Lupon and Enric Gibert and Grigorios Magklis and
Sridhar Samudrala and Ra{\'u}l Mart{\'\i}nez and
Kyriakos Stavrou and David R. Ditzel",
title = "Speculative hardware\slash software co-designed
floating-point multiply-add fusion",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "623--638",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541978",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A Fused Multiply-Add (FMA) instruction is currently
available in many general-purpose processors. It
increases performance by reducing latency of dependent
operations and increases precision by computing the
result as an indivisible operation with no intermediate
rounding. However, since the arithmetic behavior of a
single-rounding FMA operation is different than
independent FP multiply followed by FP add
instructions, some algorithms require significant
revalidation and rewriting efforts to work as expected
when they are compiled to operate with FMA --- a cost
that developers may not be willing to pay. Because of
that, abundant legacy applications are not able to
utilize FMA instructions. In this paper we propose a
novel HW/SW collaborative technique that is able to
efficiently execute workloads with increased
utilization of FMA, by adding the option to get the
same numerical result as separate FP multiply and FP
add pairs. In particular, we extended the host ISA of a
HW/SW co-designed processor with a new Combined
Multiply-Add (CMA) instruction that performs an FMA
operation with an intermediate rounding. This new
instruction is used by a transparent dynamic
translation software layer that uses a speculative
instruction-fusion optimization to transform FP
multiply and FP add sequences into CMA instructions.
The FMA unit has been slightly modified to support both
single-rounding and double-rounding fused instructions
without increasing their latency and to provide a
conservative fall-back path in case of misspeculation.
Evaluation on a cycle-accurate timing simulator showed
that CMA improved SPECfp performance by 6.3\% and
reduced executed instructions by 4.7\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Schulte:2014:PCS,
author = "Eric Schulte and Jonathan Dorn and Stephen Harding and
Stephanie Forrest and Westley Weimer",
title = "Post-compiler software optimization for reducing
energy",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "639--652",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541980",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern compilers typically optimize for executable
size and speed, rarely exploring non-functional
properties such as power efficiency. These properties
are often hardware-specific, time-intensive to
optimize, and may not be amenable to standard dataflow
optimizations. We present a general post-compilation
approach called Genetic Optimization Algorithm (GOA),
which targets measurable non-functional aspects of
software execution in programs that compile to x86
assembly. GOA combines insights from profile-guided
optimization, superoptimization, evolutionary
computation and mutational robustness. GOA searches for
program variants that retain required functional
behavior while improving non-functional behavior, using
characteristic workloads and predictive modeling to
guide the search. The resulting optimizations are
validated using physical performance measurements and a
larger held-out test suite. Our experimental results on
PARSEC benchmark programs show average energy
reductions of 20\%, both for a large AMD system and a
small Intel system, while maintaining program
functionality on target workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Wood:2014:RSA,
author = "David A. Wood",
title = "Resolved: specialized architectures, languages, and
system software should supplant general-purpose
alternatives within a decade",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "653--654",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2563369",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The field of computing has struggled since its
inception with the tension between specialization and
generalization. Specialized architectures, programming
languages, and system software promise better
performance (across many metrics, including efficiency,
productivity, etc.) for workloads that match their
specialization objective. General-purpose
architectures, languages, and system software sacrifice
extremes of performance for specific workloads, seeking
acceptable performance across a much wider range. While
specialized alternatives have always had their place,
general-purpose architectures, languages, and system
software have dominated main-stream computing systems
for the past several decades. But with Dennard scaling
already gone and the end of Moore's Law looming, some
have argued that general-purpose computing platforms
must naturally give way to specialization. In this
debate, two teams of highly-opinionated experts will
debate the proposition that specialized architectures,
languages, and system software should largely supplant
general-purpose alternatives within the next decade.
Arguments in favor of specialization include energy
efficiency in the post-Dennard scaling era, performance
scaling in the post-Moore's law era, and improvements
in programmer productivity. Arguments against include
the large investment needed to create specialized
hardware and software components, lack of tools and
interfaces to create reusable components, the semantic
gap from overspecialization, and security
vulnerabilities and general correctness issues due to
interoperation of specialized components.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Ruwase:2014:GHF,
author = "Olatunji Ruwase and Michael A. Kozuch and Phillip B.
Gibbons and Todd C. Mowry",
title = "{Guardrail}: a high fidelity approach to protecting
hardware devices from buggy drivers",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "655--670",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541970",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Device drivers are an Achilles' heel of modern
commodity operating systems, accounting for far too
many system failures. Previous work on driver
reliability has focused on protecting the kernel from
unsafe driver side-effects by interposing an
invariant-checking layer at the driver interface, but
otherwise treating the driver as a black box. In this
paper, we propose and evaluate Guardrail, which is a
more powerful framework for run-time driver analysis
that performs decoupled instruction-grain dynamic
correctness checking on arbitrary kernel-mode drivers
as they execute, thereby enabling the system to detect
and mitigate more challenging correctness bugs (e.g.,
data races, uninitialized memory accesses) that cannot
be detected by today's fault isolation techniques. Our
evaluation of Guardrail shows that it can find serious
data races, memory faults, and DMA faults in native
Linux drivers that required fixes, including previously
unknown bugs. Also, with hardware logging support,
Guardrail can be used for online protection of
persistent device state from driver bugs with at most
10\% overhead on the end-to-end performance of most
standard I/O workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Wood:2014:LLD,
author = "Benjamin P. Wood and Luis Ceze and Dan Grossman",
title = "Low-level detection of language-level data races with
{LARD}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "671--686",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541955",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Researchers have proposed always-on data-race
exceptions as a way to avoid the ill effects of data
races, but slow performance of accurate dynamic
data-race detection remains a barrier to the adoption
of always-on data-race exceptions. Proposals for
accurate low-level (e.g., hardware) data-race detection
have the potential to reduce this performance barrier.
This paper explains why low-level data-race detectors
are wrong for programs written in high-level languages
(e.g., Java): they miss true data races and report
false data races in these programs. To bring the
benefits of low-level data-race detection to high-level
languages, we design low-level abstractable race
detection (LARD), an extension of the interface between
low-level data-race detectors and run-time systems that
enables accurate language-level data-race detection
using low-level detection mechanisms. We implement
accurate LARD data-race exception support for Java,
coupling a modified Jikes RVM Java virtual machine and
a simulated hardware race detector. We evaluate our
detector's accuracy against an accurate dynamic Java
data-race detector and other low-level race detectors
without LARD, showing that naive accurate low-level
data-race detectors suffer from many missed and false
language-level races in practice, and that LARD
prevents this inaccuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Zhang:2014:EES,
author = "Jiaqi Zhang and Lakshminarayanan Renganarayana and
Xiaolan Zhang and Niyu Ge and Vasanth Bala and Tianyin
Xu and Yuanyuan Zhou",
title = "{EnCore}: exploiting system environment and
correlation information for misconfiguration
detection",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "687--700",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541983",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As software systems become more complex and
configurable, failures due to misconfigurations are
becoming a critical problem. Such failures often have
serious functionality, security and financial
consequences. Further, diagnosis and remediation for
such failures require reasoning across the software
stack and its operating environment, making it
difficult and costly. We present a framework and tool
called EnCore to automatically detect software
misconfigurations. EnCore takes into account two
important factors that are unexploited before: the
interaction between the configuration settings and the
executing environment, as well as the rich correlations
between configuration entries. We embrace the emerging
trend of viewing systems as data, and exploit this to
extract information about the execution environment in
which a configuration setting is used. EnCore learns
configuration rules from a given set of sample
configurations. With training data enriched with the
execution context of configurations, EnCore is able to
learn a broad set of configuration anomalies that spans
the entire system. EnCore is effective in detecting
both injected errors and known real-world problems ---
it finds 37 new misconfigurations in Amazon EC2 public
images and 24 new configuration problems in a
commercial private cloud. By systematically exploiting
environment information and by learning correlation
rules across multiple configuration settings, EnCore
detects 1.6x to 3.5x more misconfiguration anomalies
than previous approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Voskuilen:2014:HPF,
author = "Gwendolyn Voskuilen and T. N. Vijaykumar",
title = "High-performance fractal coherence",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "701--714",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541982",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Bugs in cache coherence protocols can cause system
failures. Despite many advances, verification runs into
state explosion for even moderately-sized systems. As
multicores' core counts increase, coherence
verifiability continues to be a key problem. A recent
proposal, called fractal coherence, avoids the state
explosion problem by applying the idea of observational
equivalence between a larger system and its smaller
sub-systems. A fractal protocol for a larger system is
verified by design if a minimal sub-system is verified
completely. While fractal coherence is a significant
step forward, there are two shortcomings: (1)
Architectural limitation: To achieve fractal
coherence's logical hierarchy, TreeFractal, the
specific fractal protocol, employs a tree architecture
where each miss traverses many levels up and down the
tree and each level redundantly holds its sub-trees'
coherence tags. (2) Protocol restrictions: TreeFractal
imposes a restriction on responses to read requests
that forces read requests to obtain clean blocks from
the nearest sharer even if the shared L2 or L3 is
faster. These limitations impose significant
performance and coherence tag state overheads. In this
paper, we propose architectural support for coherence
protocols to achieve scalable performance and
verifiability. To address the architectural limitation,
we propose FlatFractal, a directory-based architecture
which decouples fractal coherence's logical hierarchy
from the architecture and eliminates redundant tag
state. To address the protocol restriction, we propose
a simple change to the protocol that, while preserving
observational equivalence, allows read requests to
obtain the blocks from the shared L2 or L3. Our
simulations show that for 16 cores, FlatFractal
performs, on average, 57\% better than TreeFractal and
within 3\% of a conventional directory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Kwon:2014:LOC,
author = "Woo-Cheol Kwon and Tushar Krishna and Li-Shiuan Peh",
title = "Locality-oblivious cache organization leveraging
single-cycle multi-hop {NoCs}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "715--728",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541976",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Locality has always been a critical factor in on-chip
data placement on CMPs as accessing further-away caches
has in the past been more costly than accessing nearby
ones. Substantial research on locality-aware designs
have thus focused on keeping a copy of the data
private. However, this complicates the problem of data
tracking and search/invalidation; tracking the state of
a line at all on-chip caches at a directory or
performing full-chip broadcasts are both non-scalable
and extremely expensive solutions. In this paper, we
make the case for Locality-Oblivious Cache Organization
(LOCO), a CMP cache organization that leverages the
on-chip network to create virtual single-cycle paths
between distant caches, thus redefining the notion of
locality. LOCO is a clustered cache organization,
supporting both homogeneous and heterogeneous cluster
sizes, and provides near single-cycle accesses to data
anywhere within the cluster, just like a private cache.
Globally, LOCO dynamically creates a virtual mesh
connecting all the clusters, and performs an efficient
global data search and migration over this virtual
mesh, without having to resort to full-chip broadcasts
or perform expensive directory lookups. Trace-driven
and full system simulations running SPLASH-2 and PARSEC
benchmarks show that LOCO improves application run time
by up to 44.5\% over baseline private and shared
cache.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Kasture:2014:UEC,
author = "Harshad Kasture and Daniel Sanchez",
title = "{Ubik}: efficient cache sharing with strict {QoS} for
latency-critical workloads",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "729--742",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541944",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Chip-multiprocessors (CMPs) must often execute
workload mixes with different performance requirements.
On one hand, user-facing, latency-critical applications
(e.g., web search) need low tail (i.e., worst-case)
latencies, often in the millisecond range, and have
inherently low utilization. On the other hand,
compute-intensive batch applications (e.g., MapReduce)
only need high long-term average performance. In
current CMPs, latency-critical and batch applications
cannot run concurrently due to interference on shared
resources. Unfortunately, prior work on quality of
service (QoS) in CMPs has focused on guaranteeing
average performance, not tail latency. In this work, we
analyze several latency-critical workloads, and show
that guaranteeing average performance is insufficient
to maintain low tail latency, because
microarchitectural resources with state, such as caches
or cores, exert inertia on instantaneous workload
performance. Last-level caches impart the highest
inertia, as workloads take tens of milliseconds to warm
them up. When left unmanaged, or when managed with
conventional QoS frameworks, shared last-level caches
degrade tail latency significantly. Instead, we propose
Ubik, a dynamic partitioning technique that predicts
and exploits the transient behavior of latency-critical
workloads to maintain their tail latency while
maximizing the cache space available to batch
applications. Using extensive simulations, we show
that, while conventional QoS frameworks degrade tail
latency by up to 2.3x, Ubik simultaneously maintains
the tail latency of latency-critical workloads and
significantly improves the performance of batch
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Pichai:2014:ASA,
author = "Bharath Pichai and Lisa Hsu and Abhishek
Bhattacharjee",
title = "Architectural support for address translation on
{GPUs}: designing memory management units for
{CPU\slash GPUs} with unified address spaces",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "743--758",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541942",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Sep 4 07:12:13 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The proliferation of heterogeneous compute platforms,
of which CPU/GPU is a prevalent example, necessitates a
manageable programming model to ensure widespread
adoption. A key component of this is a shared unified
address space between the heterogeneous units to obtain
the programmability benefits of virtual memory. To this
end, we are the first to explore GPU Memory Management
Units(MMUs) consisting of Translation Lookaside Buffers
(TLBs) and page table walkers (PTWs) for address
translation in unified heterogeneous systems. We show
the performance challenges posed by GPU warp schedulers
on TLBs accessed in parallel with L1 caches, which
provide many well-known programmability benefits. In
response, we propose modest TLB and PTW augmentations
that recover most of the performance lost by
introducing L1 parallel TLB access. We also show that a
little TLB-awareness can make other GPU performance
enhancements (e.g., cache-conscious warp scheduling and
dynamic warp formation on branch divergence) feasible
in the face of cache-parallel address translation,
bringing overheads in the range deemed acceptable for
CPUs (10-15\\% of runtime). We presume this initial
design leaves room for improvement but anticipate that
our bigger insight, that a little TLB-awareness goes a
long way in GPUs, will spur further work in this
fruitful area.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Mondal:2014:DSM,
author = "Subijit Mondal and Subhashis Maitra",
title = "Data security-modified {AES} algorithm and its
applications",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "2",
pages = "1--8",
month = may,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2669594.2669596",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 15 16:43:20 MDT 2014",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Now a days with the rapid development of multimedia
technologies, research on safety and security are
becoming more important. Multimedia data are generated
and transmitted through the communication channels and
the wireless media. The efficiencies of encryption
based on different existing algorithms are not up to
the satisfactory limit. Hence researchers are trying to
modify the existing algorithm or even develop new
algorithms that help to increase security with a little
encryption time. Here in this paper, we have furnished
a new technology to modify the AES algorithm which
gives more security with a little encryption time and
which can be used to encrypt using 128-bit key.
Theoretical analysis on the proposed algorithm with the
existing reveals the novelty of our work. Here we have
proposed a technique to randomize the key and hidden
the key data into an encrypted digital image using the
basics concept of cryptography and also using the
concept of digital watermarking, the concept of
key-hide has also been encrypted. We have also proposed
a new technique to reposition the pixels to break the
correlation between them. So, the proposed scheme
offers a more secure and cost effective mechanism for
encryption.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sen:2014:TLT,
author = "Soumik Sen and Subhashis Maitra",
title = "Three levels three dimensional compact coding",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "2",
pages = "9--14",
month = may,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2669594.2669597",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 15 16:43:20 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware and timing complexities are the major issues
in current security related algorithms. Some of them
shows better efficiency with respect to time and some
of them reduce hardware complexities. Researchers try
to solve both the problem at the same time in an
efficient way. There are different existing algorithms
which prove this efficiency. Here we will propose a new
algorithm named as ``Three Levels Three Dimensional
Compact Coding (TLTDCC)'' which will show better
response time as well as it requires less hardware and
also in security aspect, it will provide higher
security. This paper explores a novelty of the work
through a comparative study of the proposed algorithm
with respect to different existing algorithms both in
tabular method and graphically.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thomasian:2014:BDA,
author = "Alexander Thomasian and Bingxing Liu and Yuhui Deng",
title = "Balancing disk access times in {RAID5} disk arrays in
degraded mode by conditionally prioritizing fork\slash
join requests",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "2",
pages = "15--19",
month = may,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2669594.2669598",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 15 16:43:20 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "RAID5 disk arrays with rotated parities can tolerate
single disk failures by reconstructing missing blocks
on demand by XORing the contents of corresponding $K$
blocks on surviving disks by a $K$-way Fork/Join ( F/J
) request, which is considered completed after the $K$
disks are accessed. $ F / J$ accesses in RAID5 are
processed concurrently with interfering disk accesses.
The mean response time of F/J and
independent/interfering requests: $ R^{F / J}$ /$_K$
and $ R^{\rm Ind}$ and the mean delay from the
completion of the first to the last $ F / J$ task,
known as task dispersion time: $ T^{\mr disp} /_K$, are
performance metrics of interest. Given $ R^{F / J} /_K
> R^{\rm Ind}$ with FCFS scheduling, it is desirable to
equalize disk access times, but giving a higher
nonpreemptive priority to disk accesses due to $ F / J$
requests with respect to interfering disk accesses
results in $ R^{\rm Ind}$ \& $ R^{F / J} /_K$. We
propose a continuum of conditional priority methods
based on the fraction $F$ of $ F / J$ accesses
completed with FCFS scheduling. $ F = \infty $ stands
for FCFS and $ F = 0$ stands for unconditional
priorities. Simulation shows that $ F = 1 / 8$ with $ K
= 8$ yields $ R^{F / J} /_K \approx R^_{Ind}$ for three
distributions of disk requests and in the range of $ F
/ J$ and independent disk requests considered. $F$ can
be varied adaptively based on measurement results to
balance disk access times.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gandhi:2014:BTI,
author = "Jayneel Gandhi and Arkaprava Basu and Mark D. Hill and
Michael M. Swift",
title = "{BadgerTrap}: a tool to instrument x86-64 {TLB}
misses",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "2",
pages = "20--23",
month = may,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2669594.2669599",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 15 16:43:20 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The overheads of memory management units (MMUs) have
gained importance in today's systems. Detailed
simulators may be too slow to gain insights into
micro-architectural techniques that improve MMU
efficiency. To address this issue, we propose a novel
tool, BadgerTrap, which allows online instrumentation
of TLB misses. It allows first-order analysis of new
hardware techniques to improve MMU efficiency. The tool
helps to create and analyze x86-64 TLB miss trace. We
describe example studies to show various ways this tool
can be applied to gain new research insights.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2014:INa,
author = "Mark Thorson",
title = "{Internet} nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "2",
pages = "24--36",
month = may,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2669594.2669601",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Sep 15 16:43:20 MDT 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Towles:2014:UCI,
author = "Brian Towles and J. P. Grossman and Brian Greskamp and
David E. Shaw",
title = "Unifying on-chip and inter-node switching within the
{Anton 2} network",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "1--12",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665677",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The design of network architectures has become
increasingly complex as the chips connected by
inter-node networks have emerged as distributed systems
in their own right, complete with their own on-chip
networks. In Anton 2, a massively parallel
special-purpose supercomputer for molecular dynamics
simulations, we managed this complexity by reusing the
on-chip network as a switch for inter-node traffic.
This unified network approach introduces several design
challenges. Maintaining fairness within the inter-node
network is difficult, as each hop becomes a sequence of
many on-chip routing decisions. We addressed this
problem with an inverse-weighted arbiter that ensures
fairness with low implementation costs. Balancing the
load of inter-node traffic across the on-chip network
is also critical, and we adopted an optimization
approach to design an appropriate routing algorithm.
Finally, the on-chip routers carry inter-node traffic,
so they must implement inter-node virtual channels to
avoid deadlock. In order to keep the routers small and
fast, we developed a deadlock-free routing algorithm
that reduces the number of virtual channels by
one-third relative to previous approaches. The
resulting Anton 2 network implementation efficiently
utilizes its inter-node channels and provides low
messaging latency, while occupying a modest amount of
silicon area",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Putnam:2014:RFA,
author = "Andrew Putnam and Adrian M. Caulfield and Eric S.
Chung and Derek Chiou and Kypros Constantinides and
John Demme and Hadi Esmaeilzadeh and Jeremy Fowers and
Gopi Prashanth and Gopal Jan and Gray Michael and
Haselman Scott Hauck and Stephen Heil and Amir Hormati
and Joo-Young Kim and Sitaram Lanka and James Larus and
Eric Peterson and Simon Pope and Aaron Smith and Jason
Thong and Phillip Yi and Xiao Doug Burger",
title = "A reconfigurable fabric for accelerating large-scale
datacenter services",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "13--24",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665678",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Datacenter workloads demand high computational
capabilities, flexibility, power efficiency, and low
cost. It is challenging to improve all of these factors
simultaneously. To advance datacenter capabilities
beyond what commodity server designs can provide, we
have designed and built a composable, reconfigurable
fabric to accelerate portions of large-scale software
services. Each instantiation of the fabric consists of
a 6x8 2-D torus of high-end Stratix V FPGAs embedded
into a half-rack of 48 machines. One FPGA is placed
into each server, accessible through PCIe, and wired
directly to other FPGAs with pairs of 10 Gb SAS cables
In this paper, we describe a medium-scale deployment of
this fabric on a bed of 1,632 servers, and measure its
efficacy in accelerating the Bing web search engine. We
describe the requirements and architecture of the
system, detail the critical engineering challenges and
solutions needed to make the system robust in the
presence of failures, and measure the performance,
power, and resilience of the system when ranking
candidate documents. Under high load, the large-scale
reconfigurable fabric improves the ranking throughput
of each server by a factor of 95\% for a fixed latency
distribution --- or, while maintaining equivalent
throughput, reduces the tail latency by 29\%",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Daya:2014:SCR,
author = "Bhavya K. Daya and Chia-Hsin Owen Chen and Suvinay
Subramanian and Woo-Cheol Kwon and Sunghyun Park and
Tushar Krishna and Jim Holt and Anantha P. Chandrakasan
and Li-Shiuan Peh",
title = "{SCORPIO}: a $ 36$-core research chip demonstrating
snoopy coherence on a scalable mesh {NoC} with
in-network ordering",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "25--36",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665680",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the many-core era, scalable coherence and on-chip
interconnects are crucial for shared memory processors.
While snoopy coherence is common in small multicore
systems, directory-based coherence is the de facto
choice for scalability to many cores, as snoopy relies
on ordered interconnects which do not scale. However,
directory-based coherence does not scale beyond tens of
cores due to excessive directory area overhead or
inaccurate sharer tracking. Prior techniques supporting
ordering on arbitrary unordered networks are
impractical for full multicore chip designs We present
SCORPIO, an ordered mesh Network-on-Chip (NoC)
architecture with a separate fixed-latency, bufferless
network to achieve distributed global ordering. Message
delivery is decoupled from the ordering, allowing
messages to arrive in any order and at any time, and
still be correctly ordered. The architecture is
designed to plug-and-play with existing multicore IP
and with practicality, timing, area, and power as top
concerns. Full-system 36 and 64-core simulations on
SPLASH-2 and PARSEC benchmarks show an average
application runtime reduction of 24.1\% and 12.9\%, in
comparison to distributed directory and AMD
HyperTransport coherence protocols, respectively The
SCORPIO architecture is incorporated in an 11
mm-by-13mm chip prototype, fabricated in IBM 45nm SOI
technology, comprising 36 Freescale e200 Power
Architecture\TM{} cores with private L1 and L2 caches
interfacing with the NoC via ARM AMBA, along with two
Cadence on-chip DDR2 controllers. The chip prototype
achieves a post synthesis operating frequency of 1 GHz
(833MHz post-layout) with an estimated power of 28.8W
(768mW per tile), while the network consumes only 10\%
of tile area and 19 \% of tile power.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Upasani:2014:ACD,
author = "Gaurang Upasani and Xavier Vera and Antonio
Gonz{\'a}lez",
title = "Avoiding core's {DUE \& SDC} via acoustic wave
detectors and tailored error containment and recovery",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "37--48",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665682",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The trend of downsizing transistors and operating
voltage scaling has made the processor chip more
sensitive against radiation phenomena making soft
errors an important challenge. New reliability
techniques for handling soft errors in the logic and
memories that allow meeting the desired
failures-in-time (FIT) target are key to keep
harnessing the benefits of Moore's law. The failure to
scale the soft error rate caused by particle strikes,
may soon limit the total number of cores that one may
have running at the same time This paper proposes a
light-weight and scalable architecture to eliminate
silent data corruption errors (SDC) and detected
unrecoverable errors (DUE) of a core. The architecture
uses acoustic wave detectors for error detection. We
propose to recover by confining the errors in the cache
hierarchy, allowing us to deal with the relatively long
detection latencies. Our results show that the proposed
mechanism protects the whole core (logic, latches and
memory arrays) incurring performance overhead as low as
0.60\%",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Chen:2014:MLC,
author = "Long Chen and Zhao Zhang",
title = "{MemGuard}: a low cost and energy efficient design to
support and enhance memory system reliability",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "49--60",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665683",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory system reliability is increasingly a concern as
memory cell density and capacity continue to grow. The
conventional approach is to use redundant memory bits
for error detection and correction, with significant
storage, cost and power overheads. In this paper, we
propose a novel, system-level scheme called MemGuard
for memory error detection. With OS-based
checkpointing, it is also able to recover program
execution from memory errors. The memory error
detection of MemGuard is motivated by memory integrity
verification using log hashes. It is much stronger than
SECDED in error detection, incurs negligible hardware
cost and energy overhead and no storage overhead, and
is compatible with various memory organizations. It may
play the role of ECC memory in consumer-level computers
and mobile devices, without the shortcomings of ECC
memory. In server computers, it may complement SECDED
ECC or Chipkill Correct by providing even stronger
error detection. We have comprehensively investigated
and evaluated the feasibility and reliability of
MemGuard. We show that using an incremental multiset
hash function and a non-cryptographic hash function,
the performance and energy overheads of MemGuard are
negligible. We use the mathematical deduction and
synthetic simulation to prove that MemGuard is robust
and reliable.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Hari:2014:GGE,
author = "Siva Kumar Sastry Hari and Radha Venkatagiri and
Sarita V. Adve and Helia Naeimi",
title = "{GangES}: gang error simulation for hardware
resiliency evaluation",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "61--72",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665685",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As technology scales, the hardware reliability
challenge affects a broad computing market, rendering
traditional redundancy based solutions too expensive.
Software anomaly based hardware error detection has
emerged as a low cost reliability solution, but suffers
from Silent Data Corruptions (SDCs). It is crucial to
accurately evaluate SDC rates and identify SDC
producing software locations to develop
software-centric low-cost hardware resiliency
solutions. A recent tool, called Relyzer,
systematically analyzes an entire application's
resiliency to single bit soft-errors using a small set
of carefully selected error injection sites. Relyzer
provides a practical resiliency evaluation mechanism
but still requires significant evaluation time, most of
which is spent on error simulations. This paper
presents a new technique called GangES (Gang Error
Simulator) that aims to reduce error simulation time.
GangES observes that a set or gang of error simulations
that result in the same intermediate execution state
(after their error injections) will produce the same
error outcome; therefore, only one simulation of the
gang needs to be completed, resulting in significant
overall savings in error simulation time. GangES
leverages program structure to carefully select when to
compare simulations and what state to compare. For our
workloads, GangES saves 57\% of the total error
simulation time with an overhead of just 1.6\% This
paper also explores pure program analyses based
techniques that could obviate the need for tools such
as GangES altogether. The availability of
Relyzer+GangES allows us to perform a detailed
evaluation of such techniques. We evaluate the accuracy
of several previously proposed program metrics. We find
that the metrics we considered and their various linear
combinations are unable to adequately predict an
instruction's vulnerability to SDCs, further motivating
the use of Relyzer+GangES style techniques as valuable
solutions for the hardware error resiliency evaluation
problem",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Wadden:2014:RWD,
author = "Jack Wadden and Alexander Lyashevsky and Sudhanva
Gurumurthi and Vilas Sridharan and Kevin Skadron",
title = "Real-world design and evaluation of compiler-managed
{GPU} redundant multithreading",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "73--84",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665686",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Reliability for general purpose processing on the GPU
(GPGPU) is becoming a weak link in the construction of
reliable supercomputer systems. Because hardware
protection is expensive to develop, requires dedicated
on-chip resources, and is not portable across different
architectures, the efficiency of software solutions
such as redundant multithreading (RMT) must be
explored. This paper presents a real-world design and
evaluation of automatic software RMT on GPU hardware.
We first describe a compiler pass that automatically
converts GPGPU kernels into redundantly threaded
versions. We then perform detailed power and
performance evaluations of three RMT algorithms, each
of which provides fault coverage to a set of structures
in the GPU. Using real hardware, we show that
compiler-managed software RMT has highly variable
costs. We further analyze the individual costs of
redundant work scheduling, redundant computation, and
inter-thread communication, showing that no single
component in general is responsible for high overheads
across all applications; instead, certain workload
properties tend to cause RMT to perform well or poorly.
Finally, we demonstrate the benefit of architectural
support for RMT with a specific example of fast,
register-level thread communication",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Chen:2014:ARA,
author = "Tianshi Chen and Qi Guo and Ke Tang and Olivier Temam
and Zhiwei Xu and Zhi-Hua Zhou and Yunji Chen",
title = "{ArchRanker}: a ranking approach to design space
exploration",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "85--96",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665688",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Architectural Design Space Exploration (DSE) is a
notoriously difficult problem due to the exponentially
large size of the design space and long simulation
times. Previously, many studies proposed to formulate
DSE as a regression problem which predicts architecture
responses (e.g., time, power) of a given architectural
configuration. Several of these techniques achieve high
accuracy, though often at the cost of significant
simulation time for training the regression models. We
argue that the information the architect mostly needs
during the DSE process is whether a given configuration
will perform better than another one in the presences
of design constraints, or better than any other one
seen so far, rather than precisely estimating the
performance of that configuration. Based on this
observation, we propose a novel ranking-based approach
to DSE where we train a model to predict which of two
architecture configurations will perform best. We show
that, not only this ranking model more accurately
predicts the relative merit of two architecture
configurations than an ANN-based state-of-the-art
regression model, but also that it requires much fewer
training simulations to achieve the same accuracy, or
that it can be used for and is even better at
quantifying the performance gap between two
configurations We implement the framework for training
and using this model, called ArchRanker, and we
evaluate it on several DSE scenarios (unicore/multicore
design spaces, and both time and power performance
metrics). We try to emulate as closely as possible the
DSE process by creating constraint-based scenarios, or
an iterative DSE process. We find that ArchRanker makes
29:68\% to 54:43\% fewer incorrect predictions on
pairwise relative merit of configurations (tested with
79,800 configuration pairs) than an ANN-based
regression model across all DSE scenarios considered
(values averaged over all benchmarks for each
scenario). We also find that, to achieve the same
accuracy as ArchRanker, the ANN often requires three
times more training simulations",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Shao:2014:APR,
author = "Yakun Sophia Shao and Brandon Reagen and Gu-Yeon Wei
and David Brooks",
title = "{Aladdin}: a {Pre-RTL}, power-performance accelerator
simulator enabling large design space exploration of
customized architectures",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "97--108",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665689",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware specialization, in the form of accelerators
that provide custom datapath and control for specific
algorithms and applications, promises impressive
performance and energy advantages compared to
traditional architectures. Current research in
accelerator analysis relies on RTL-based synthesis
flows to produce accurate timing, power, and area
estimates. Such techniques not only require significant
effort and expertise but are also slow and tedious to
use, making large design space exploration infeasible.
To overcome this problem, we present Aladdin, a
pre-RTL, power-performance accelerator modeling
framework and demonstrate its application to
system-on-chip (SoC) simulation. Aladdin estimates
performance, power, and area of accelerators within
0.9\%, 4.9\%, and 6.6\% with respect to RTL
implementations. Integrated with architecture-level
core and memory hierarchy simulators, Aladdin provides
researchers an approach to model the power and
performance of accelerators in an SoC environment",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Badr:2014:SST,
author = "Mario Badr and Natalie Enright Jerger",
title = "{SynFull}: synthetic traffic models capturing cache
coherent behaviour",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "109--120",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665691",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern and future many-core systems represent complex
architectures. The communication fabrics of these large
systems heavily influence their performance and power
consumption. Current simulation methodologies for
evaluating networks-on-chip (NoCs) are not keeping pace
with the increased complexity of our systems;
architects often want to explore many different design
knobs quickly. Methodologies that capture workload
trends with faster simulation times are highly
beneficial at early stages of architectural
exploration. We propose SynFull, a synthetic traffic
generation methodology that captures both application
and cache coherence behaviour to rapidly evaluate NoCs.
SynFull allows designers to quickly indulge in detailed
performance simulations without the cost of
long-running full-system simulation. By capturing a
full range of application and coherence behaviour,
architects can avoid the over- or under-design of the
network as may occur when using traditional synthetic
traffic patterns such as uniform random. SynFull has
errors as low as 0.3\% and provides 50x speedup on
average over full-system simulation",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Venkat:2014:HID,
author = "Ashish Venkat and Dean M. Tullsen",
title = "Harnessing {ISA} diversity: design of a
{heterogeneous-ISA} chip multiprocessor",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "121--132",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665692",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Heterogeneous multicore architectures have the
potential for high performance and energy efficiency.
These architectures may be composed of small
power-efficient cores, large high-performance cores,
and/or specialized cores that accelerate the
performance of a particular class of computation.
Architects have explored multiple dimensions of
heterogeneity, both in terms of micro-architecture and
specialization. While early work constrained the cores
to share a single ISA, this work shows that allowing
heterogeneous ISAs further extends the effectiveness of
such architectures This work exploits the diversity
offered by three modern ISAs: Thumb, x86-64, and Alpha.
This architecture has the potential to outperform the
best single-ISA heterogeneous architecture by as much
as 21\%, with 23\% energy savings and a reduction of
32\% in Energy Delay Product.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Sembrant:2014:DDD,
author = "Andreas Sembrant and Erik Hagersten and David
Black-Schaffer",
title = "The {Direct-to-Data (D2D)} cache: navigating the cache
hierarchy with a single lookup",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "133--144",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665694",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern processors optimize for cache energy and
performance by employing multiple levels of caching
that address bandwidth, low-latency and high-capacity.
A request typically traverses the cache hierarchy,
level by level, until the data is found, thereby
wasting time and energy in each level. In this paper,
we present the Direct-to-Data (D2D) cache that locates
data across the entire cache hierarchy with a single
lookup. To navigate the cache hierarchy, D2D extends
the TLB with per cache-line location information that
indicates in which cache and way the cache line is
located. This allows the D2D cache to: (1) skip levels
in the hierarchy (by accessing the right cache level
directly), (2) eliminate extra data array reads (by
reading the right way directly), (3) avoid tag
comparisons (by eliminating the tag arrays), and (4) go
directly to DRAM on cache misses (by checking the TLB).
This reduces the L2 latency by 40\% and saves 5-17\% of
the total cache hierarchy energy D2D's lower L2 latency
directly improves L2 sensitive applications'
performance by 5-14\%. More significantly, we can take
advantage of the L2 latency reduction to optimize other
parts of the micro-architecture. For example, we can
reduce the ROB size for the L2 bound applications by
25\%, or we can reduce the L1 cache size, delivering an
overall 21\% energy savings across all benchmarks,
without hurting performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Arelakis:2014:SSC,
author = "Angelos Arelakis and Per Stenstrom",
title = "{SC2}: a statistical compression cache scheme",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "145--156",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665696",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Low utilization of on-chip cache capacity limits
performance and wastes energy because of the long
latency, limited bandwidth, and energy consumption
associated with off-chip memory accesses. Value
replication is an important source of low capacity
utilization. While prior cache compression techniques
manage to code frequent values densely, they trade off
a high compression ratio for low decompression latency,
thus missing opportunities to utilize capacity more
effectively. This paper presents, for the first time, a
detailed design space exploration of caches that
utilize statistical compression. We show that more
aggressive approaches like Huffman coding, which have
been neglected in the past due to the high processing
overhead for (de)compression, are suitable techniques
for caches and memory. Based on our key observation
that value locality varies little over time and across
applications, we first demonstrate that the overhead of
statistics acquisition for code generation is low
because new encodings are needed rarely, making it
possible to off-load it to software routines. We then
show that the high compression ratio obtained by
Huffman-coding makes it possible to utilize the
performance benefits of 4X larger last-level caches
with about 50\% lower power consumption than such
larger caches",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Seshadri:2014:DBI,
author = "Vivek Seshadri and Abhishek Bhowmick and Onur Mutlu
and Phillip B. Gibbons and Michael A. Kozuch and Todd
C. Mowry",
title = "The dirty-block index",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "157--168",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665697",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "On-chip caches maintain multiple pieces of metadata
about each cached block --- e.g., dirty bit, coherence
information, ECC. Traditionally, such metadata for each
block is stored in the corresponding tag entry in the
tag store. While this approach is simple to implement
and scalable, it necessitates a full tag store lookup
for any metadata query --- resulting in high latency
and energy consumption. We find that this approach is
inefficient and inhibits several cache optimizations.
In this work, we propose a new way of organizing the
dirty bit information that enables simpler and more
efficient implementations of several optimizations. In
our proposed approach, we remove the dirty bits from
the tag store and organize it differently in a separate
structure, which we call the Dirty-Block Index (DBI).
The organization of DBI is simple: it consists of
multiple entries, each corresponding to some row in
DRAM. A bit vector in each entry tracks whether or not
each block in the corresponding DRAM row is dirty We
demonstrate the benefits of DBI by using it to
simultaneously and efficiently implement three
optimizations proposed by prior work: (1) Aggressive
DRAM-aware writeback, (2) Bypassing cache lookups, and
(3) Heterogeneous ECC for clean/dirty blocks. DBI, with
all three optimizations enabled, improves performance
by 31\% compared to the baseline (by 6\% compared to
the best previous mechanism) while reducing overall
cache area cost by 8\% compared to prior approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Liu:2014:GVM,
author = "Lei Liu and Yong Li and Zehan Cui and Yungang Bao and
Mingyu Chen and Chengyong Wu",
title = "Going vertical in memory management: handling
multiplicity by multi-policy",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "169--180",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665698",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many emerging applications from various domains often
exhibit heterogeneous memory characteristics. When
running in combination on parallel platforms, these
applications present a daunting variety of workload
behaviors that challenge the effectiveness of any
memory allocation strategy. Prior partitioning-based or
random memory allocation schemes typically manage only
one level of the memory hierarchy and often target
specific workloads. To handle diverse and dynamically
changing memory and cache allocation needs, we augment
existing ``horizontal'' cache/DRAM bank partitioning
with vertical partitioning and explore the resulting
multi-policy space. We study the performance of these
policies for over 2000 workloads and correlate the
results with application characteristics via a data
mining approach. Based on this correlation we derive
several practical memory allocation rules that we
integrate into a unified multi-policy framework to
guide resources partitioning and coalescing for dynamic
and diverse multiprogrammed/ threaded workloads. We
implement our approach in Linux kernel 2.6.32 as a
restructured page indexing system plus a series of
kernel modules. Extensive experiments show that, in
practice, our framework can select proper memory
allocation policy and consistently outperforms the
unmodified Linux kernel, achieving up to 11\%
performance gains compared to prior techniques",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Orr:2014:FGT,
author = "Marc S. Orr and Bradford M. Beckmann and Steven K.
Reinhardt and David A. Wood",
title = "Fine-grain task aggregation and coordination on
{GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "181--192",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665701",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In general-purpose graphics processing unit (GPGPU)
computing, data is processed by concurrent threads
executing the same function. This model, dubbed
single-instruction/multiple-thread (SIMT), requires
programmers to coordinate the synchronous execution of
similar opera-tions across thousands of data elements.
To alleviate this programmer burden, Gaster and Howes
outlined the channel abstraction, which facilitates
dynamically aggregating asynchronously produced
fine-grain work into coarser-grain tasks. However, no
practical implementation has been proposed To this end,
we propose and evaluate the first channel
implementation. To demonstrate the utility of channels,
we present a case study that maps the fine-grain,
recursive task spawning in the Cilk programming
language to channels by representing it as a flow
graph. To support data-parallel recursion in bounded
memory, we propose a hardware mechanism that allows
wavefronts to yield their execution resources. Through
channels and wavefront yield, we implement four Cilk
benchmarks. We show that Cilk can scale with the GPU
architecture, achieving speedups of as much as 4.3x on
eight compute units",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Tanasic:2014:EPM,
author = "Ivan Tanasic and Isaac Gelado and Javier Cabezas and
Alex Ramirez and Nacho Navarro and Mateo Valero",
title = "Enabling preemptive multiprogramming on {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "193--204",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665702",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPUs are being increasingly adopted as compute
accelerators in many domains, spanning environments
from mobile systems to cloud computing. These systems
are usually running multiple applications, from one or
several users. However GPUs do not provide the support
for resource sharing traditionally expected in these
scenarios. Thus, such systems are unable to provide key
multiprogrammed workload requirements, such as
responsiveness, fairness or quality of service. In this
paper, we propose a set of hardware extensions that
allow GPUs to efficiently support multiprogrammed GPU
workloads. We argue for preemptive multitasking and
design two preemption mechanisms that can be used to
implement GPU scheduling policies. We extend the
architecture to allow concurrent execution of GPU
kernels from different user processes and implement a
scheduling policy that dynamically distributes the GPU
cores among concurrently running kernels, according to
their priorities. We extend the NVIDIA GK110 (Kepler)
like GPU architecture with our proposals and evaluate
them on a set of multiprogrammed workloads with up to
eight concurrent processes. Our proposals improve
execution time of high-priority processes by 15.6x, the
average application turnaround time between 1.5x to 2x,
and system fairness up to 3.4x",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Voitsechov:2014:SGM,
author = "Dani Voitsechov and Yoav Etsion",
title = "Single-graph multiple flows: energy efficient design
alternative for {GPGPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "205--216",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665703",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present the single-graph multiple-flows (SGMF)
architecture that combines coarse-grain reconfigurable
computing with dynamic dataflow to deliver massive
thread-level parallelism. The CUDA-compatible SGMF
architecture is positioned as an energy efficient
design alternative for GPGPUs. The architecture maps a
compute kernel, represented as a dataflow graph, onto a
coarse-grain reconfigurable fabric composed of a grid
of interconnected functional units. Each unit
dynamically schedules instances of the same static
instruction originating from different CUDA threads.
The dynamically scheduled functional units enable
streaming the data of multiple threads (or graph flows,
in SGMF parlance) through the grid. The combination of
statically mapped instructions and direct communication
between functional units obviate the need for a full
instruction pipeline and a centralized register file,
whose energy overheads burden GPGPU We show that the
SGMF architecture delivers performance comparable to
that of contemporary GPGPUs while consuming 57\% less
energy on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Campanoni:2014:HRA,
author = "Simone Campanoni and Kevin Brownell and Svilen Kanev
and Timothy M. Jones and Gu-Yeon Wei and David Brooks",
title = "{HELIX--RC}: an architecture-compiler co-design for
automatic parallelization of irregular programs",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "217--228",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665705",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Data dependences in sequential programs limit
parallelization because extracted threads cannot run
independently. Although thread-level speculation can
avoid the need for precise dependence analysis,
communication overheads required to synchronize actual
dependences counteract the benefits of parallelization.
To address these challenges, we propose a lightweight
architectural enhancement co-designed with a
parallelizing compiler, which together can decouple
communication from thread execution. Simulations of
these approaches, applied to a processor with 16 Intel
Atom-like cores, show an average of 6.85x performance
speedup for six SPEC CINT2000 benchmarks",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Smith:2014:EDN,
author = "James E. Smith",
title = "Efficient digital neurons for large scale cortical
architectures",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "229--240",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665707",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Digital neurons are implemented with the goal of
sup-porting research and development of architectures
which implement the computational paradigm of the
neocortex. Four spiking digital neurons are implemented
at the register transfer level in a manner that permits
side-by-side comparisons. Two of the neurons contain
two stages of exponential decay, one for synapse
conductances and one for membrane potential. The other
two neurons contain only one stage of exponential decay
for membrane potential. The two stage neurons respond
to an input spike with a change in membrane potential
that has a non-infinite leading edge slope; the one
stage neurons exhibit a change in membrane potential
with an abrupt, infinite leading edge slope. This leads
to a behavioral difference when a number of input
spikes occur in very close time proximity. However, the
one stage neurons are as much as a factor of ten more
energy efficient than the two stage neurons, as
measured by the number of dynamic add-equivalent
operations. A new two stage neuron is proposed. This
neuron reduces the number of decay components and
implements decays in both stages via piece-wise linear
approximation. Together, these simplifications yield
two stage neuron behavior with energy efficiency that
is only about a factor of two worse than the simplest
one stage neuron.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Swaminathan:2014:EAS,
author = "Karthik Swaminathan and Huichu Liu and Jack Sampson
and Vijaykrishnan Narayanan",
title = "An examination of the architecture and system-level
tradeoffs of employing steep slope devices in {$3$D}
{CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "241--252",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665709",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "For any given application, there is an optimal
throughput point in the space of per-processor
performance and the number of such processors given to
that application. However, due to thermal, yield, and
other constraints, not all of these optimal points can
plausibly be constructed with a given technology. In
this paper, we look at how emerging steep slope
devices, 3D circuit integration, and trends in process
technology scaling will combine to shift the boundaries
of both attainable performance, and the optimal set of
technologies to employ to achieve it. We propose a
heterogeneous-technology 3D architecture capable of
operating efficiently at an expanded number of points
in this larger design space and devise a heterogeneity
and thermal aware scheduling algorithm to exploit its
potential. Our heterogeneous mapping techniques are
capable of producing speedups ranging from 17\% for a
high end server workloads running at around 90${}^\circ
$C to over 160\% for embedded systems running below
60${}^\circ $C",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Venkatesan:2014:SST,
author = "Rangharajan Venkatesan and Shankar Ganesh
Ramasubramanian and Swagath Venkataramani and Kaushik
Roy and Anand Raghunathan",
title = "{STAG}: spintronic-tape architecture for {GPGPU} cache
hierarchies",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "253--264",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665710",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "General-purpose Graphics Processing Units (GPGPUs) are
widely used for executing massively parallel workloads
from various application domains. Feeding data to the
hundreds to thousands of cores that current GPGPUs
integrate places great demands on the memory hierarchy,
fueling an ever-increasing demand for on-chip memory.
In this work, we propose STAG, a high density,
energy-efficient GPGPU cache hierarchy design using a
new spintronic memory technology called Domain Wall
Memory (DWM). DWMs inherently offer unprecedented
benefits in density by storing multiple bits in the
domains of a ferromagnetic nanowire, which logically
resembles a bit-serial tape. However, this structure
also leads to a unique challenge that the bits must be
sequentially accessed by performing ``shift''
operations, resulting in variable and potentially
higher access latencies. To address this challenge,
STAG utilizes a number of architectural techniques :
(i) a hybrid cache organization that employs different
DWM bit-cells to realize the different memory arrays
within the GPGPU cache hierarchy, (ii) a clustered,
bit-interleaved organization, in which the bits in a
cache block are spread across a cluster of DWM tapes,
allowing parallel access, (iii) tape head management
policies that predictively configure DWM arrays to
reduce the expected number of shift operations for
subsequent accesses, and (iv) a shift aware promotion
buffer (SaPB), in which accesses to the DWM cache are
predicted based on intra-warp locality, and locations
that would incur a large shift penalty are promoted to
a smaller buffer. Over a wide range of benchmarks from
the Rodinia, ISPASS and Parboil suites, STAG achieves
significant benefits in performance (12.1\% over SRAM
and 5.8\% over STT-MRAM) and energy (3.3X over SRAM and
2.6X over STT-MRAM)",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Pelley:2014:MP,
author = "Steven Pelley and Peter M. Chen and Thomas F.
Wenisch",
title = "Memory persistency",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "265--276",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665712",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging nonvolatile memory technologies (NVRAM)
promise the performance of DRAM with the persistence of
disk. However, constraining NVRAM write order,
necessary to ensure recovery correctness, limits NVRAM
write concurrency and degrades throughput. We require
new memory interfaces to minimally describe write
constraints and allow high performance and high
concurrency data structures. These goals strongly
resemble memory consistency. Whereas memory consistency
concerns the order that memory operations are observed
between numerous processors, persistent memory systems
must constrain the order that writes occur with respect
to failure. We introduce memory persistency, a new
approach to designing persistent memory interfaces,
building on memory consistency. Similar to memory
consistency, memory persistency models may be relaxed
to improve performance. We describe the design space of
memory persistency and desirable features that such a
memory system requires. Finally, we introduce several
memory persistency models and evaluate their ability to
expose NVRAM write concurrency using two
implementations of a persistent queue. Our results show
that relaxed persistency models accelerate system
throughput 30-fold by reducing NVRAM write
constraints",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Hoseinzadeh:2014:RAL,
author = "Morteza Hoseinzadeh and Mohammad Arjomand and Hamid
Sarbazi-Azad",
title = "Reducing access latency of {MLC PCMs} through line
striping",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "277--288",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665713",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Although phase change memory with multi-bit storage
capability (known as MLC PCM) offers a good combination
of high bit-density and non-volatility, its performance
is severely impacted by the increased read/write
latency. Regarding read operation, access latency
increases almost linearly with respect to cell density
(the number of bits stored in a cell). Since reads are
latency critical, they can seriously impact system
performance. This paper alleviates the problem of slow
reads in the MLC PCM by exploiting a fundamental
property of MLC devices: the Most-Significant Bit (MSB)
of MLC cells can be read as fast as SLC cells, while
reading the Least-Significant Bits (LSBs) is slower. We
propose Striped PCM (SPCM), a memory architecture that
leverages this property to keep MLC read latency in the
order of SLC's. In order to avoid extra writes onto
memory cells as a result of striping memory lines, the
proposed design uses a pairing write queue to
synchronize write-back requests associated with blocks
that are paired in striping mode. Our evaluation shows
that our design significantly improves the average
memory access latency by more than 30\% and IPC by up
to 25\% (10\%, on average), with a slight overhead in
memory energy (0.7\%) in a 4-core CMP model running
memory-intensive benchmarks",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Jung:2014:HHI,
author = "Myoungsoo Jung and Wonil Choi and Shekhar Srikantaiah
and Joonhyuk Yoo and Mahmut T. Kandemir",
title = "{HIOS}: a host interface {I/O} scheduler for solid
state disks",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "289--300",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665715",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Garbage collection (GC) and resource contention on I/O
buses (channels) are among the critical bottlenecks in
Solid State Disks (SSDs) that cannot be easily hidden.
Most existing I/O scheduling algorithms in the host
interface logic (HIL) of state-of-the-art SSDs are
oblivious to such low-level performance bottlenecks in
SSDs. As a result, SSDs may violate quality of service
(QoS) requirements by not being able to meet the
deadlines of I/O requests. In this paper, we propose a
novel host interface I/O scheduler that is both
GC-aware and QoS-aware. The proposed scheduler
redistributes the GC overheads across non-critical I/O
requests and reduces channel resource contention. Our
experiments with workloads from various application
domains reveal that the proposed scheduler reduces the
standard deviation for latency over state-of-the-art
I/O schedulers used in the HIL by 52.5\%, and the
worst-case latency by 86.6\%. In addition, for I/O
requests with sizes smaller than a superpage, our
proposed scheduler avoids channel resource conflicts
and reduces latency by 29.2\% compared to the
state-of-the-art",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Lo:2014:TEP,
author = "David Lo and Liqun Cheng and Rama Govindaraju and Luiz
Andr{\'e} Barroso and Christos Kozyrakis",
title = "Towards energy proportionality for large-scale
latency-critical workloads",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "301--312",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665718",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Reducing the energy footprint of warehouse-scale
computer (WSC) systems is key to their affordability,
yet difficult to achieve in practice. The lack of
energy proportionality of typical WSC hardware and the
fact that important workloads (such as search) require
all servers to remain up regardless of traffic
intensity renders existing power management techniques
ineffective at reducing WSC energy use. We present
PEGASUS, a feedback-based controller that significantly
improves the energy proportionality of WSC systems, as
demonstrated by a real implementation in a Google
search cluster. PEGASUS uses request latency statistics
to dynamically adjust server power management limits in
a fine-grain manner, running each server just fast
enough to meet global service-level latency objectives.
In large cluster experiments, PEGASUS reduces power
consumption by up to 20\%. We also estimate that a
distributed version of PEGASUS can nearly double these
savings",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Liu:2014:SRJ,
author = "Yanpei Liu and Stark C. Draper and Nam Sung Kim",
title = "{SleepScale}: runtime joint speed scaling and sleep
states management for power efficient data centers",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "313--324",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665719",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Power consumption in data centers has been growing
significantly in recent years. To reduce power, servers
are being equipped with increasingly sophisticated
power management mechanisms. Different mechanisms offer
dramatically different trade-offs between power savings
and performance penalties. Considering the complexity,
variety, and temporally varying nature of the
applications hosted in a typical data center,
intelligently determining which power management policy
to use and when is a complicated task. In this paper we
analyze a system model featuring both performance
scaling and low-power states. We reveal the interplay
between performance scaling and low-power states via
intensive simulation and analytic verification. Based
on the observations, we present SleepScale, a runtime
power management tool designed to efficiently exploit
existing power control mechanisms. At run time,
SleepScale characterizes power consumption and
quality-of-service (QoS) for each low-power state and
frequency setting, and selects the best policy for a
given QoS constraint. We evaluate SleepScale using
workload traces from data centers and achieve
significant power savings relative to conventional
power management strategies",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Liu:2014:OVM,
author = "Ming Liu and Tao Li",
title = "Optimizing virtual machine consolidation performance
on {NUMA} server architecture for cloud workloads",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "325--336",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665720",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Server virtualization and workload consolidation
enable multiple workloads to share a single physical
server, resulting in significant energy savings and
utilization improvements. The shift of physical server
architectures to NUMA and the increasing popularity of
scale-out cloud applications undermine workload
consolidation efficiency and result in overall system
degradation. In this work, we characterize the
consolidation of cloud workloads on NUMA virtualized
systems, estimate four different sources of
architecture overhead, and explore optimization
opportunities beyond the default NUMA-aware hypervisor
memory management Motivated by the observed
architectural impact on cloud workload consolidation
performance, we propose three optimization techniques
incorporating NUMA access overhead into the
hypervisor's virtual machine memory allocation and page
fault handling routines. Among these, estimation of the
memory zone access overhead serves as a foundation for
the other two techniques: a NUMA overhead aware buddy
allocator and a P2M swap FIFO. Cache hit rate, cycle
loss due to cache miss, and IPC serve as indicators to
estimate the access cost of each memory node. Our
optimized buddy allocator dynamically selects
low-overhead memory zones and ``proportionally''
distributes memory pages across target nodes. The P2M
swap FIFO records recently unused PFN, MFN lists for
mapping exchanges to rebalance memory access pressure
within one domain. Our real system based evaluations
show a 41.1\% performance improvement when
consolidating 16-VMs on a 4-socket server (the proposed
allocator contributes 22.8\% of the performance gain
and the P2M swap FIFO accounts for the rest).
Furthermore, our techniques can cooperate well with
other methods (i.e. vCPU migration) and scale well when
varying VM memory size and the number of sockets in a
physical host",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{O:2014:RBD,
author = "Seongil O and Young Hoon Son and Nam Sung Kim and Jung
Ho Ahn",
title = "Row-buffer decoupling: a case for low-latency {DRAM}
microarchitecture",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "337--348",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665723",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern DRAM devices for the main memory are structured
to have multiple banks to satisfy ever-increasing
throughput, energy-efficiency, and capacity demands.
Due to tight cost constraints, only one row can be
buffered (opened) per bank and actively service
requests at a time, while the row must be deactivated
(closed) before a new row is stored into the row
buffers. Hasty deactivation unnecessarily re-opens rows
for otherwise row-buffer hits while hindsight
accompanies the deactivation process on the critical
path of accessing data for row-buffer misses. The time
to (de)activate a row is comparable to the time to read
an open row while applications are often sensitive to
DRAM latency. Hence, it is critical to make the right
decision on when to close a row. However, the
increasing number of banks per DRAM device over
generations reduces the number of requests per bank.
This forces a memory controller to frequently predict
when to close a row due to a lack of information on
future requests, while the dynamic nature of memory
access patterns limits the prediction accuracy In this
paper, we propose a novel DRAM microarchitecture that
can eliminate the need for any prediction. First, we
identify that precharging the bitlines dominates the
deactivate time, while sense amplifiers that work as a
row buffer are physically coupled with the bitlines
such that a single command precharges both bitlines and
sense amplifiers simultaneously. By decoupling the
bitlines from the row buffers using isolation
transistors, the bitlines can be precharged right after
a row becomes activated. Therefore, only the sense
amplifiers need to be precharged for a miss in most
cases, taking an order of magnitude shorter time than
the conventional deactivation process. Second, we show
that this row-buffer decoupling enables internal DRAM
?-operations to be separated and recombined, which can
be exploited by memory controllers to make the main
memory system more energy efficient. Our experiments
demonstrate that row-buffer decoupling improves the
geometric mean of the instructions per cycle and
MIPS2/W by 14\% and 29\%, respectively, for
memory-intensive SPEC CPU2006 applications",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Zhang:2014:HDH,
author = "Tao Zhang and Ke Chen and Cong Xu and Guangyu Sun and
Tao Wang and Yuan Xie",
title = "{Half-DRAM}: a high-bandwidth and low-power {DRAM}
architecture from the rethinking of fine-grained
activation",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "349--360",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665724",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "DRAM memory is a major contributor for the total power
consumption in modern computing systems. Consequently,
power reduction for DRAM memory is critical to improve
system-level power efficiency. Fine-grained DRAM
architecture [1, 2] has been proposed to reduce the
activation/ precharge power. However, those prior work
either incurs significant performance degradation or
introduces large area overhead. In this paper, we
propose a novel memory architecture Half-DRAM, in which
the DRAM array is reorganized to enable only half of a
row being activated. The half-row activation can
effectively reduce activation power and meanwhile
sustain the full bandwidth one bank can provide. In
addition, the half-row activation in Half-DRAM relaxes
the power constraint in DRAM, and opens up
opportunities for further performance gain.
Furthermore, two half-row accesses can be issued in
parallel by integrating the sub-array level parallelism
to improve the memory level parallelism. The
experimental results show that Half-DRAM can achieve
both significant performance improvement and power
reduction, with negligible design overhead",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Kim:2014:FBM,
author = "Yoongu Kim and Ross Daly and Jeremie Kim and Chris
Fallin and Ji Hye Lee and Donghyuk Lee and Chris
Wilkerson and Konrad Lai and Onur Mutlu",
title = "Flipping bits in memory without accessing them: an
experimental study of {DRAM} disturbance errors",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "361--372",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665726",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory isolation is a key property of a reliable and
secure computing system --- an access to one memory
address should not have unintended side effects on data
stored in other addresses. However, as DRAM process
technology scales down to smaller dimensions, it
becomes more difficult to prevent DRAM cells from
electrically interacting with each other. In this
paper, we expose the vulnerability of commodity DRAM
chips to disturbance errors. By reading from the same
address in DRAM, we show that it is possible to corrupt
data in nearby addresses. More specifically, activating
the same row in DRAM corrupts data in nearby rows. We
demonstrate this phenomenon on Intel and AMD systems
using a malicious program that generates many DRAM
accesses. We induce errors in most DRAM modules (110
out of 129) from three major DRAM manufacturers. From
this we conclude that many deployed systems are likely
to be at risk. We identify the root cause of
disturbance errors as the repeated toggling of a DRAM
row's wordline, which stresses inter-cell coupling
effects that accelerate charge leakage from nearby
rows. We provide an extensive characterization study of
disturbance errors and their behavior using an
FPGA-based testing platform. Among our key findings, we
show that (i) it takes as few as 139K accesses to
induce an error and (ii) up to one in every 1.7K cells
is susceptible to errors. After examining various
potential ways of addressing the problem, we propose a
low-overhead solution to prevent the errors",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Zhang:2014:AIP,
author = "Runjie Zhang and Ke Wang and Brett H. Meyer and Mircea
R. Stan and Kevin Skadron",
title = "Architecture implications of pads as a scarce
resource",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "373--384",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665728",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Due to non-ideal technology scaling, delivering a
stable supply voltage is increasingly challenging.
Furthermore, competition for limited chip interface
resources (i.e., C4 pads) between power supply and I/O,
and the loss of such resources to electromigration,
means that constructing a power delivery network (PDN)
that satisfies noise margins without compromising
performance is and will remain a critical problem for
architects and circuit designers alike. Simple
guardbanding will no longer work, as the consequent
performance penalty will grow with technology scaling
In this paper, we develop a pre-RTL PDN model,
VoltSpot, for the purpose of studying the performance
and noise tradeoffs among power supply and I/O pad
allocation, the effectiveness of noise mitigation
techniques, and the consequent implications of
electromigration-induced PDN pad failure. Our
simulations demonstrate that, despite their integral
role in the PDN, power/ground pads can be aggressively
reduced (by conversion into I/O pads) to their
electromigration limit with minimal performance impact
from extra voltage noise --- provided the system
implements a suitable noise-mitigation strategy. The
key observation is that even though reducing
power/ground pads significantly increases the number of
voltage emergencies, the average noise amplitude
increase is small. Overall, we can triple I/O bandwidth
while maintaining target lifetimes and incurring only
1.5\% slowdown",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Chen:2014:ICB,
author = "Shaoming Chen and Yue Hu and Ying Zhang and Lu Peng
and Jesse Ardonne and Samuel Irving and Ashok
Srivastava",
title = "Increasing off-chip bandwidth in multi-core processors
with switchable pins",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "385--396",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665730",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Off-chip memory bandwidth has been considered as one
of the major limiting factors to processor performance,
especially for multi-cores and many-cores. Conventional
processor design allocates a large portion of off-chip
pins to deliver power, leaving a small number of pins
for processor signal communication. We observed that
the processor requires much less power than that can be
supplied during memory intensive stages. This is due to
the fact that the frequencies of processor cores
waiting for data to be fetched from off-chip memories
can be scaled down in order to save power without
degrading performance. In this work, motivated by this
observation, we propose a dynamic pin switch technique
to alleviate the bandwidth limitation issue. The
technique is introduced to dynamically exploit the
surplus pins for power delivery in the memory intensive
phases and uses them to provide extra bandwidth for the
program executions, thus significantly boosting the
performance",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Jiang:2014:LPR,
author = "Lei Jiang and Bo Zhao and Jun Yang and Youtao Zhang",
title = "A low power and reliable charge pump design for phase
change memories",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "397--408",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665731",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The emerging Phase Change Memory (PCM) technology
exhibits excellent scalability and density potentials.
At the same time, they require high current and high
voltages to switch cell states. Their working voltages
are provided by CMOS-compatible on-chip charge pumps
(CPs). Unfortunately, CPs and particularly those for
RESET, have a large parasitic power (a dominant
component in total power loss) during operations, which
significantly degrades their energy efficiency. In
addition, CPs seriously suffer from the Time-Dependent
Dielectric Breakdown (TDDB) problem due to their
boosted operation voltage. To maintain a reasonable
lifetime of CPs, existing solutions actively switch
them on per-operation basis, resulting in large
performance degradation In this paper, we address the
above issues through two designs --- Reset_Sch (RESET
scheduling) and CP_Sch (CP scheduling). Reset_Sch
schedules when to perform a RESET for different cells
upon writing a PCM line. It significantly reduces the
power loss, and peak working power of RESET CP. CP_Sch
incorporates a fast READ CP design to provide fast
charge-up time for reads and minimize performance
penalty. Our experimental results show that on average,
70\% of power loss for RESET CP can be reduced; and
performance loss can be reduced from 16\% to 2\% while
achieving a 16\% improvement in reliability",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Voskuilen:2014:FCP,
author = "Gwendolyn Voskuilen and T. N. Vijaykumar",
title = "{Fractal++}: closing the performance gap between
fractal and conventional coherence",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "409--420",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665733",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cache coherence protocol bugs can cause multicores to
fail. Existing coherence verification approaches incur
state explosion at small scales or require considerable
human effort. As protocols' complexity and multicores'
core counts increase, verification continues to be a
challenge. Recently, researchers proposed fractal
coherence which achieves scalable verification by
enforcing observational equivalence between sub-systems
in the coherence protocol. A larger subsystem is
verified implicitly if a smaller sub-system has been
verified. Unfortunately, fractal protocols suffer from
two fundamental limitations: (1)
indirect-communication: sub-systems cannot directly
communicate and (2) partially-serial invalidations:
cores must be invalidated in a specific, serial order.
These limitations disallow common performance
optimizations used by conventional directory protocols:
reply forwarding where caches communicate directly and
parallel invalidations. Therefore, fractal protocols
lack performance scalability while directory protocols
lack verification scalability. To enable both
performance and verification scalability, we propose
Fractal++ which employs a new class of protocol
optimizations for verification-constrained
architectures: decoupled-replies, contention-hints, and
fully-parallel-fractal-invalidations. The first two
optimizations allow reply-forwarding-like performance
while the third optimization enables parallel
invalidations in fractal protocols. Unlike conventional
protocols, Fractal++ preserves observational
equivalence and hence is scalably verifiable. In
32-core simulations of single- and four-socket systems,
Fractal++ performs nearly as well as a directory
protocol while providing scalable verifiability whereas
the best-performing previous fractal protocol performs
8\% on average and up to 26\% worse with a
single-socket and 12\% on average and up to 34\% worse
with a longer-latency multi-socket system",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Qian:2014:ODB,
author = "Xuehai Qian and Benjamin Sahelices and Josep
Torrellas",
title = "{OmniOrder}: directory-based conflict serialization of
transactions",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "421--432",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665734",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Effective execution of atomic blocks of instructions
(also called transactions) can enhance the performance
and programmability of multiprocessors. Atomic blocks
can be demarcated in software as in Transactional
Memory (TM) or dynamically generated by the hardware as
in aggressive implementations of strict memory
consistency. In most current designs, when two atomic
blocks conflict, one is squashed --- a performance loss
that is often unnecessary. To avoid this waste, this
paper presents OmniOrder, the first design that
efficiently executes conflicting atomic blocks
concurrently in a directory-based coherence
environment. The idea is to keep only non-speculative
data in the caches and, when the cache coherence
protocol transfers a line, include in the message the
history of speculative updates to the line. The
coherence protocol transitions are unmodified. We
evaluate OmniOrder with 64-core simulations. In a TM
environment, OmniOrder reduces the execution time of
the STAMP applications by an average of 18.4\% over a
scheme that squashes on conflict. In an environment
with SC enforcement with speculation, we run 11
programs that implement concurrent algorithms.
OmniOrder reduces the programs' execution time by an
average of 15.3\% relative to a scheme that squashes on
conflict. Finally, OmniOrder's communication overhead
of transferring the history of speculative updates is
negligible",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Qian:2014:PRR,
author = "Xuehai Qian and Benjamin Sahelices and Depei Qian",
title = "{Pacifier}: record and replay for relaxed-consistency
multiprocessors with distributed directory protocol",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "433--444",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665736",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Record and Deterministic Replay (R\&R) of
multithreaded programs on relaxed-consistency
multiprocessors with distributed directory protocol has
been a long-standing open problem. The independently
developed RelaxReplay [8] solves the problem by
assuming write atomicity. This paper proposes Pacifier,
the first R\&R scheme to provide a solution without
assuming write atomicity. R\&R for relaxed-consistency
multiprocessors needs to detect, record and replay
Sequential Consistency Violations (SCV). Pacifier has
two key components: (i) Relog, a general memory
reordering logging and replay mechanism that can
reproduce SCVs in relaxed memory models, and (ii)
Granule, an SCV detection scheme in the record phase
with good precision, that indicates whether to record
with Relog. We show that Pacifier is a sweet spot in
the design space with a reasonable trade-off between
hardware and log overhead. An evaluation with
simulations of 16, 32 and 64 processors with Release
Consistency (RC) running SPLASH-2 applications
indicates that Pacifier incurs 3.9\% ~ 16\% larger
logs. The slowdown of Pacifier during replay is 10.1\%
~ 30.5\% compared to native execution",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Honarmand:2014:RDL,
author = "Nima Honarmand and Josep Torrellas",
title = "Replay debugging: leveraging record and replay for
program debugging",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "445--456",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665737",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware-assisted Record and Deterministic Replay
(RnR) of programs has been proposed as a primitive for
debugging hard-to-repeat software bugs. However, simply
providing support for repeatedly stumbling on the same
bug does not help diagnose it. For bug diagnosis,
developers typically want to modify the code, e.g., by
creating and operating on new variables, or printing
state. Unfortunately, this renders the RnR log
inconsistent and makes Replay Debugging (i.e.,
debugging while using an RnR log for replay) dicey at
best This paper presents rdb, the first scheme for
replay debugging that guarantees exact replay. rdb
relies on two mechanisms. The first one is compiler
support to split the instrumented application into two
executables: one that is identical to the original
program binary, and another that encapsulates all the
added debug code. The second mechanism is a runtime
infrastructure that replays the application and,
without affecting it in any way, invokes the
appropriate debug code at the appropriate locations. We
describe an implementation of rdb based on LLVM and
Pin, and show an example of how rdb's replay debugging
helps diagnose a real bug",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Woodruff:2014:CCM,
author = "Jonathan Woodruff and Robert N. M. Watson and David
Chisnall and Simon W. Moore and Jonathan Anderson and
Brooks Davis and Ben Laurie and Peter G. Neumann and
Robert Norton and Michael Roe",
title = "The {CHERI} capability model: revisiting {RISC} in an
age of risk",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "457--468",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665740",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Motivated by contemporary security challenges, we
reevaluate and refine capability-based addressing for
the RISC era. We present CHERI, a hybrid capability
model that extends the 64-bit MIPS ISA with
byte-granularity memory protection. We demonstrate that
CHERI enables language memory model enforcement and
fault isolation in hardware rather than software, and
that the CHERI mechanisms are easily adopted by
existing programs for efficient in-program memory
safety. In contrast to past capability models, CHERI
complements, rather than replaces, the ubiquitous
page-based protection mechanism, providing a migration
path towards deconflating data-structure protection and
OS memory management. Furthermore, CHERI adheres to a
strict RISC philosophy: it maintains a load-store
architecture and requires only single-cycle
instructions, and supplies protection primitives to the
compiler, language runtime, and operating system. We
demonstrate a mature FPGA implementation that runs the
FreeBSD operating system with a full range of software
and an open-source application suite compiled with an
extended LLVM to use CHERI memory protection. A limit
study compares published memory safety mechanisms in
terms of instruction count and memory overheads. The
study illustrates that CHERI is performance-competitive
even while providing assurance and greater flexibility
with simpler hardware",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Vilanova:2014:CPS,
author = "Llu{\"\i}s Vilanova and Muli Ben-Yehuda and Nacho
Navarro and Yoav Etsion and Mateo Valero",
title = "{CODOMs}: protecting software with code-centric memory
domains",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "469--480",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665741",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Today's complex software systems are neither secure
nor reliable. The rudimentary software protection
primitives provided by current hardware forces systems
to run many distrusting software components (e.g.,
procedures, libraries, plugins, modules) in the same
protection domain, or otherwise suffer degraded
performance from address space switches. We present
CODOMs (COde-centric memory DOMains), a novel
architecture that can provide finer-grained isolation
between software components with effectively zero
run-time overhead, all at a fraction of the complexity
of other approaches. An implementation of CODOMs in a
cycle-accurate full-system x86 simulator demonstrates
that with the right hardware support, finer-grained
protection and run-time performance can peacefully
coexist.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Perais:2014:EPW,
author = "Arthur Perais and Andr{\'e} Seznec",
title = "{EOLE}: paving the way for an effective implementation
of value prediction",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "481--492",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665742",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Even in the multicore era, there is a continuous
demand to increase the performance of single-threaded
applications. However, the conventional path of
increasing both issue width and instruction window size
inevitably leads to the power wall. Value prediction
(VP) was proposed in the mid 90's as an alternative
path to further enhance the performance of wide-issue
superscalar processors. Still, it was considered up to
recently that a performance-effective implementation of
Value Prediction would add tremendous complexity and
power consumption in almost every stage of the pipeline
Nonetheless, recent work in the field of VP has shown
that given an efficient confidence estimation
mechanism, prediction validation could be removed from
the out-of-order engine and delayed until commit time.
As a result, recovering from mispredictions via
selective replay can be avoided and a much simpler
mechanism --- pipeline squashing --- can be used, while
the out-of-order engine remains mostly unmodified. Yet,
VP and validation at commit time entails strong
constraints on the Physical Register File. Write ports
are needed to write predicted results and read ports
are needed in order to validate them at commit time,
potentially rendering the overall number of ports
unbearable. Fortunately, VP also implies that many
single-cycle ALU instructions have their operands
predicted in the front-end and can be executed
in-place, in-order. Similarly, the execution of
single-cycle instructions whose result has been
predicted can be delayed until commit time since
predictions are validated at commit time Consequently,
a significant number of instructions --- 10\% to 60\%
in our experiments --- can bypass the out-of-order
engine, allowing the reduction of the issue width,
which is a major contributor to both out-of-order
engine complexity and register file port requirement.
This reduction paves the way for a truly practical
implementation of Value Prediction. Furthermore, since
Value Prediction in itself usually increases
performance, our resulting {Early | Out-of-Order |
Late} Execution architecture, EOLE, is often more
efficient than a baseline VP-augmented 6-issue
superscalar while having a significantly narrower
4-issue out-of-order engine",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Czechowski:2014:IEE,
author = "Kenneth Czechowski and Victor W. Lee and Ed Grochowski
and Ronny Ronen and Ronak Singhal and Richard Vuduc and
Pradeep Dubey",
title = "Improving the energy efficiency of big cores",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "493--504",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665743",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Traditionally, architectural innovations designed to
boost single-threaded performance incur overhead costs
which significantly increase power consumption. In many
cases the increase in power exceeds the improvement in
performance, resulting in a net increase in energy
consumption. Thus, it is reasonable to assume that
modern attempts to improve single-threaded performance
will have a negative impact on energy efficiency. This
has led to the belief that ``Big Cores'' are inherently
inefficient. To the contrary, we present a study which
finds that the increased complexity of the core
microarchitecture in recent generations of the IntelR
CoreTM processor have reduced both the time and energy
required to run various workloads. Moreover, taking out
the impact of process technology changes, our study
still finds the architecture and microarchitecture
changes --- such as the increase in SIMD width,
addition of the frontend caches, and the enhancement to
the out-of-order execution engine --- account for 1.2x
improvement in energy efficiency for these processors.
This paper provides real-world examples of how
architectural innovations can mitigate inefficiencies
associated with ``Big Cores'' --- for example, micro-op
caches obviate the costly decode of complex x86
instructions --- resulting in a core architecture that
is both high performance and energy efficient. It also
contributes to the understanding of how
microarchitecture affects performance, power and energy
efficiency by modeling the relationship between them",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{StAmant:2014:GPC,
author = "Ren{\'e}e {St. Amant} and Amir Yazdanbakhsh and Jongse
Park and Bradley Thwaites and Hadi Esmaeilzadeh and
Arjang Hassibi and Luis Ceze and Doug Burger",
title = "General-purpose code acceleration with
limited-precision analog computation",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "505--516",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665746",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As improvements in per-transistor speed and energy
efficiency diminish, radical departures from
conventional approaches are becoming critical to
improving the performance and energy efficiency of
general-purpose processors. We propose a solution from
circuit to compiler --- that enables general-purpose
use of limited-precision, analog hardware to accelerate
``approximable'' code --- code that can tolerate
imprecise execution. We utilize an algorithmic
transformation that automatically converts approximable
regions of code from a von Neumann model to an
``analog'' neural model. We outline the challenges of
taking an analog approach, including restricted-range
value encoding, limited precision in computation,
circuit inaccuracies, noise, and constraints on
supported topologies. We address these limitations with
a combination of circuit techniques, a
hardware/software interface, neural network training
techniques, and compiler support. Analog neural
acceleration provides whole application speedup of 3.7x
and energy savings of 6.3x with quality loss less than
10\% for all except one benchmark. These results show
that using limited-precision analog circuits for code
acceleration, through a neural approach, is both
feasible and beneficial over a range of
approximation-tolerant, emerging applications including
financial analysis, signal processing, robotics, 3D
gaming, compression, and image processing",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Madhavan:2014:RLH,
author = "Advait Madhavan and Timothy Sherwood and Dmitri
Strukov",
title = "Race logic: a hardware acceleration for dynamic
programming algorithms",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "517--528",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665747",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose a novel computing approach, dubbed ``Race
Logic'', in which information, instead of being
represented as logic levels, as is done in conventional
logic, is represented as a timing delay. Under this new
information representation, computations can be
performed by observing the relative propagation times
of signals injected into the circuit (i.e. the outcome
of races). Race Logic is especially suited for solving
problems related to the",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Arnau:2014:ERF,
author = "Jose-Maria Arnau and Joan-Manuel Parcerisa and
Polychronis Xekalakis",
title = "Eliminating redundant fragment shader executions on a
mobile {GPU} via hardware memoization",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "529--540",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665748",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Redundancy is at the heart of graphical applications.
In fact, generating an animation typically involves the
succession of extremely similar images. In terms of
rendering these images, this behavior translates into
the creation of many fragment programs with the exact
same input data. We have measured this fragment
redundancy for a set of commercial Android
applications, and found that more than 40\% of the
fragments used in a frame have been already computed in
a prior frame. In this paper we try to exploit this
redundancy, using fragment memoization. Unfortunately,
this is not an easy task as most of the redundancy
exists across frames, rendering most HW based schemes
unfeasible. We thus first take a step back and try to
analyze the temporal locality of the redundant
fragments, their complexity, and the number of inputs
typically seen in fragment programs. The result of our
analysis is a task level memoization scheme, that
easily outperforms the current state-of-the-art in low
power GPUs More specifically, our experimental results
show that our scheme is able to remove 59.7\% of the
redundant fragment computations on average. This
materializes to a significant speedup of 17.6\% on
average, while also improving the overall energy
efficiency by 8.9\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Zhu:2014:WAS,
author = "Yuhao Zhu and Vijay Janapa Reddi",
title = "{WebCore}: architectural support for mobile {Web}
browsing",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "3",
pages = "541--552",
month = jun,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2678373.2665749",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The Web browser is undoubtedly the single most
important application in the mobile ecosystem. An
average user spends 72 minutes each day using the
mobile Web browser. nWeb browser internal engines
(e.g., WebKit) are also growing in importance because
they provide a common substrate for developing various
mobile Web applications. In a user-driven, interactive,
and latency-sensitive environment, the browser's
performance is crucial. However, the
battery-constrained nature of mobile devices limits the
performance that we can deliver for mobile Web
browsing. As traditional general-purpose techniques to
improve performance and energy efficiency fall short,
we must employ domain-specific knowledge while still
maintaining general-purpose flexibility In this paper,
we first perform design-space exploration to identify
appropriate general-purpose architectures that uniquely
fit the characteristics of a popular Web browsing
engine. Despite our best effort, we discover sources of
energy inefficiency in these customized general-purpose
architectures. To mitigate these inefficiencies, we
propose, synthesize, and evaluate two new
domain-specific specializations, called the Style
Resolution Unit and the Browser Engine Cache. Our
optimizations boost energy efficiency and at the same
time improve mobile Web browsing performance. As
emerging mobile workloads increasingly rely more on Web
browser technologies, the type of optimizations we
propose will become important in the future and are
likely to have lasting widespread impact",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '14 conference proceedings.",
}
@Article{Kodama:2014:PFB,
author = "Yuetsu Kodama and Toshihiro Hanawa and Taisuke Boku
and Mitsuhisa Sato",
title = "{PEACH2}: an {FPGA}-based {PCIe} network device for
Tightly Coupled Accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "3--8",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693716",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In recent years, heterogeneous clusters using
accelerators are often used for high performance
computing systems. In such clusters, inter-node
communication between accelerators requires several
memory copies via CPU memory, and the communication
latency incurred severely reduces performance. To solve
this problem, we have been proposing a Tightly Coupled
Accelerators (TCA) architecture intended to reduce the
communication latency between accelerators over
different nodes. In the TCA architecture, PCI Express
packets are used for communication among GPUs over
nodes. We developed a communication chip that we call
the named PEACH2 chip, to help implement the TCA
architecture. In this paper, we describe the details of
the design and implementation of the PEACH2 chip, with
respect to its routing mechanism and its DMA controller
using FPGA. We evaluated the PEACH2 on a new platform
that uses the latest Xeon CPU, IvyBridge, and achieved
2.3 GBytes/sec between GPUs over nodes, while the
performance was only 880 MBytes/sec on the previous
platform with SandyBridge.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Nomura:2014:PAM,
author = "Shimpei Nomura and Takuji Mitsuishi and Jun Suzuki and
Yuki Hayashi and Masaki Kan and Hideharu Amano",
title = "Performance Analysis of the {Multi-GPU} System with
{ExpEther}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "9--14",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693717",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "A GPU cluster in which each node provides a few GPUs
connected with PCIe (PCI Express) is commonly used for
acceleration of a large application program requiring
the performance beyond a single GPU. However, in such a
system, programmers are required to describe two
parallel programming between nodes in MPIs or other
message passing library as well as the fine grained
parallel programming for intra-GPUs. As a cost
effective alternative of such clusters, we propose a
novel multi-GPU system with ExpEther, a virtualization
technique which extends PCIe of a host CPU to Ethernet.
All devices connected by ExpEther can be treated as if
they were directly connected to the host. Evaluation
with two application programs with and without GPU-GPU
communication revealed that the proposed system with
four GPUs achieved 3.88 and 3.29 times performance
improvement respectively compared with a single GPU
system. Compared with GPU cluster system in which each
node provides a GPU, the proposed system achieved about
7\% and 30\% performance improvement, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Watanabe:2014:GAH,
author = "Tsuyoshi Watanabe and Naohito Nakasato",
title = "{GPU} Accelerated Hybrid Tree Algorithm for Collision
Less {$N$}-body Simulations",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "15--20",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693718",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose a hybrid tree algorithm for reducing
calculation and communication cost of collision-less
N-body simulations. The concept of our algorithm is
that we split interaction force into two parts:
hard-force from neighbor particles and soft-force from
distant particles, and applying different time
integration for the forces. For hard-force calculation,
we can efficiently reduce the calculation and
communication cost of the parallel tree code because we
only need data of neighbor particles for this part. We
implement the algorithm on GPU clusters to accelerate
force calculation for both hard and soft force. As the
result of implementing the algorithm on GPU clusters,
we were able to reduce the communication cost and the
total execution time to 40\% and 80\% of that of a
normal tree algorithm, respectively. In addition, the
reduction factor relative the normal tree algorithm is
smaller for large number of processes, and we expect
that the execution time can be ultimately reduced down
to about 70\% of the normal tree algorithm.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Tsuyama:2014:GFA,
author = "Haruhisa Tsuyama and Tsutomu Maruyama",
title = "{GPU} and {FPGA} Acceleration of Level Set Method",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "21--25",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693719",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The level set method is one of the most powerful image
segmentation methods. Its computational complexity,
however, is very high, and many approaches to reduce
the computation time have been proposed. In this paper,
we describe a new level set algorithm for parallel
processing, and its implementation on GPU and FPGA. The
computational complexity of this algorithm is higher
than previous algorithms, but it is possible to achieve
higher performance by parallel processing. We
implemented the algorithm on GeForce GTX780Ti, and
Xilinx XC7VX485T, and compared their performances.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Tanabe:2014:FAO,
author = "Yu Tanabe and Tsutomu Maruyama",
title = "Fast and Accurate Optical Flow Estimation using
{FPGA}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "27--32",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693720",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we extend an approach used in the
stereo vision for the optical flow estimation to
achieve lower error rates. In the optical flow
estimation, two dimensional search is required, and
more hardware resources becomes necessary than the
stereo vision that requires only one dimensional
search. In our implementation, the target image is
divided into sub-images, and they are processed in turn
to reduce the required circuit size. The error rates by
our system is much lower than previous works, and its
processing speed is fast enough for practical
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Torres-Huitzil:2014:AEI,
author = "Cesar Torres-Huitzil and Marco Aurelio
Nu{\~n}o-Maganda",
title = "Area-time Efficient Implementation of Local Adaptive
Image Thresholding in Reconfigurable Hardware",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "33--38",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693721",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Local adaptive thresholding plays an important role in
image binarization since it is used to effectively
distinguish objects of interest from background
regions. This step affects the performance of further
processing stages in embedded computer vision
applications. In local thresholding, a threshold is
defined for each pixel as a function of all pixels
within a rectangular neighborhood, and as a
consequence, this yields a high computational cost
requiring significant processing time when thresholding
high resolution images or large data sets. This paper
presents an area-time efficient hardware implementation
of a local adaptive thresholding technique based on the
Bernsen algorithm targeted to a field programmable gate
array (FPGA) device. Experimental results show that the
proposed implementation is resource efficient and able
to process a 1024x1024 gray level image in less than 10
milliseconds independent of the neighborhood size. The
architecture demonstrates over 100-fold speedup
compared to a straightforward software implementation
of the original Bernsen algorithm on a desktop
computer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Gohringer:2014:RMS,
author = "Diana G{\"o}hringer",
title = "Reconfigurable Multiprocessor Systems: Handling
{Hydras} Heads --- A Survey",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "39--44",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693722",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Novel solutions are needed to fulfill the increasing
demands of embedded systems, i.e. lowering the energy
consumption, increasing the performance, reducing the
development time and keeping the costs as low as
possible. In addition, there exist several
applications, which require runtime adaptations of the
algorithms based on the connection to its environment.
These challenges can be solved by using reconfigurable
Multiprocessor Systems-on-Chip (MPSoCs), which can
adapt the hardware as well as the software to the
application requirements and therefore achieve a high
computational efficiency as well as a high flexibility.
However, the development, the programming and the
operation of such flexible and heterogeneous systems is
very complex as the many criteria (Performance, power
consumption, costs, development time, runtime
adaptations, etc.) open a huge design space. In this
paper an overview of the challenges faced when
developing runtime adaptive MPSoCs is given. Finally,
for each challenge a survey of possible solutions are
presented.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Sano:2014:FBC,
author = "Kentaro Sano and Ryotaro Chiba and Tomoya Ueno and
Hayato Suzuki and Ryo Ito and Satoru Yamamoto",
title = "{FPGA}-based Custom Computing Architecture for
Large-Scale Fluid Simulation with Building Cube
Method",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "45--50",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693723",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We are designing a custom computing machine for
large-scale fluid simulation with the building-cube
method (BCM). In BCM, parallel computation is performed
with cubes, each of which is an orthogonal grid with a
fixed resolution of cells. Although BCM is advantageous
in balancing loads with cubes, it also has a problem of
efficiency and scalability for computing with
general-purpose supercomputers due to insufficient
memory bandwidth and communication overhead of an
interconnection network. In this paper, we present a
custom computing architecture for FPGA-based scalable
BCM computation with a dedicated network, called an
accelerator domain network (ADN). We design a cube
engine which allows bandwidth-efficient computation of
cubes based on streamed stencil computation of the
fractional-step method. Through prototype
implementation, we evaluate the potential performance
of the architecture. For ALTERA Stratix V 28nm FPGA, we
estimate that a single FPGA has the peak performance of
107 GFlop/s in a single precision.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Wang:2014:GRS,
author = "Tao Wang and Guangyu Sun and Jiahua Chen and Jian Gong
and Haoyang Wu and Xiaoguang Li and Songwu Lu and Jason
Cong",
title = "{GRT}: a Reconfigurable {SDR} Platform with High
Performance and Usability",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "51--56",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693724",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The importance of software-defined radio (SDR)
continues to increase. However, existing SDR platforms
become less efficient as the wireless industry moves
towards Gigabit WiFi. In this work, we propose a novel
reconfigurable SDR platform named GRT. With the help of
reconfigurable architecture and corresponding software
support, SDR designs on GRT can leverage high
performance of the underlying hardware and provide
sufficient usability, including the support for
efficient modular design, commodity interface, good
programmability, code reusability, etc. We implement an
802.11a/g WiFi system on GRT to evaluate its
performance. The results demonstrate that GRT can
achieve a substantial improvement in usability while
still satisfying the performance requirement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Ando:2014:CSF,
author = "Yuki Ando and Masataka Ogawa and Yuya Mizoguchi and
Kouta Kumagai and Miaw Torng-Der and Shinya Honda",
title = "A Case Study of {FPGA Blokus Duo} Solver by
System-Level Design",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "57--62",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693725",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a case study to design a Blokus
Duo solver by using our system-level design toolkit
named SystemBuilder. We start with a modeling of the
Blokus nDuo solver by C language and communication APIs
which are provided by SystemBuilder. Then, we
iteratively verified and tuned the parameters in the
solver by running the model on a general computer in
order to improve the performance of the solver.
Finally, the implementation on FPGA was automatically
generated from the model by SystemBuilder. Despite the
FPGA implementation, we have never written hardware
description language throughout the case study. The
case study demonstrates the easiness to design system
on FPGA by System-level design tools.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Joldes:2014:SSH,
author = "Mioara Joldes and Valentina Popescu and Warwick
Tucker",
title = "Searching for Sinks for the {H{\'e}non} Map using a
Multiple-precision {GPU} Arithmetic Library",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "63--68",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693726",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Today, GPUs represent an important hardware
development platform for many problems in dynamical
systems, where massive parallel computations are
needed. Beside that, many numerical studies of chaotic
dynamical systems require a computing precision higher
than common floating point (FP) formats. One such
application is locating invariant sets for chaotic
dynamical systems. In particular, we focus on
rigorously proving the existence of stable periodic
orbits for the H{\'e}non map for parameter values close
to the classical ones. For that, we present a
multiple-precision floating-point arithmetic library in
CUDA programming language for the NVIDIA GPU platform.
Our library extends the precision using so-called FP
expansions, where a number is represented as the
unevaluated sum of standard machine precision FP
numbers. This format offers the advantage of using
directly available and highly optimized hardware FP
operations. We generalize algorithms used by
multiple-precisions libraries such as Bailey's QD, or
the analogue GPU version, GQD.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Soejima:2014:MPF,
author = "Rie Soejima and Koji Okina and Keisuke Dohi and
Yuichiro Shibata and Kiyoshi Oguri",
title = "A Memory Profiling Framework for Stencil Computation
on an {FPGA} Accelerator with High Level Synthesis",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "69--74",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693727",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we propose a framework to assist memory
access optimization for stencil computation on an FPGA
accelerator. Since the stencil computations such as
scientific simulations need large amounts of data,
efficient memory access is a key to achieving high
performance on FPGA accelerators. Therefore, we
implemented a stencil computation framework with a
memory performance profiler on MaxCompiler, which is
one of high level synthesis systems. The memory
profiler enables us to measure clock cycles for various
memory controller states; data transfer, stall, and
idle. We also implemented simple stencil computations
and practical FDTD electromagnetic field simulations on
top of the framework with various parameters to
evaluate and analyze memory performance. As a result of
execution experiments of the simple stencil
computations on a MAX34245A Data Flow Engine, it was
demonstrated that approximately 70\% of the peak memory
performance could be achieved for various stencil
types. On the other hand, the FDTD simulations, which
need many data streams, could not hit this memory
performance saturation point, because of increasing
complexity of memory controller modules. Through the
analysis of evaluation results obtained by our memory
performance profiling framework, a promising memory
access optimization approach for stencil computations
in which the complexity of the memory controller is
traded off against data access traffic is suggested.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Morishima:2014:PEG,
author = "Shin Morishima and Hiroki Matsutani",
title = "Performance Evaluations of Graph Database using {CUDA}
and {OpenMP} Compatible Libraries",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "75--80",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693728",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Graph databases use graph structures to store data
sets as nodes, edges, and properties. They are used to
store and search the relationships between a large
number of nodes, such as social networking services and
recommendation engines that use customer social
graphs. Since computation cost for graph search queries
increases as the graph becomes large, in this paper we
accelerate the graph search functions (Dijkstra and A*
algorithms) of a graph database Neo4j using two ways:
multi-threaded library and CUDA library for graphics
processing units (GPUs). We use 100,000-node graphs
generated based on a degree distribution of Facebook
social graph for evaluations. Our multi-threaded and
GPU-based implementations require an auxiliary
adjacency matrix for a target graph. The results show
that, when we do not take into account additional
overhead to generate the auxiliary adjacency matrix,
multi-threaded version improves the Dijkstra and A*
search performance by 16.2x and 13.8x compared to the
original implementation. The GPU-based implementation
improves the Dijkstra and A* search performance by
26.2x and 32.8x. When we take into account the
overhead, although the speed-ups by our implementations
are reduced, by reusing the auxiliary adjacency matrix
for multiple graph search queries we can significantly
improve the graph search performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Mitsuishi:2014:ABF,
author = "Takuji Mitsuishi and Shimpei Nomura and Jun Suzuki and
Yuki Hayashi and Masaki Kan and Hideharu Amano",
title = "Accelerating Breadth First Search on {GPU--BOX}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "81--86",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693729",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "The graph analysis has been applied in various fields
related to big-data processing and actively researched
in recent years. For processing a larger scale of
graph, parallel computing with multi-GPU system is paid
attention as an economical solution. Here, an efficient
parallel method is proposed to solve a typical graph
analysis, Breadth First Search (BFS) for multi-GPU
systems. Our target system is GPU-BOX, a prototype of
multi-GPU system using ExpEther which is a
virtualization technology based on PCI Express and
Ethernet. Although many vertices between GPUs must be
exchanged to run BFS on multi-GPU system, GPU-BOX
provides only small communication performance because
of using Ethernet. Our parallel algorithm for BFS is
designed so as to reduce the traffic between GPUs as
possible. The proposed method reduced 30-40\% traffic
between GPUs and improved the traditional parallel
method by 10\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Nunez-Yanez:2014:EER,
author = "Jose Nunez-Yanez",
title = "Energy efficient Reconfigurable Computing with
Adaptive Voltage and Logic scaling",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "87--92",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693730",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper investigates a novel energy-proportional
concept that combines closed-loop voltage scalability
and run-time hardware reconfiguration. Voltage scaling
is based on in-situ detectors that allow the device to
detect valid working voltage and frequency pairs at
run-time. The combined approach named AVLS (Adaptive
Voltage and Logic Scaling) enables the adaptation of
capacitance, voltage and frequency to obtain power and
energy savings based on workload, process and operating
conditions in a closed-loop configuration. The
technique is applied to a reconfigurable motion
estimation processor that can be configured with a
variable number of execution units and it is used as a
test vehicle. The results demonstrate that the proposed
voltage scaling can obtain up to 85\% reduction in
energy compared with nominal voltage operation at the
same frequency. This efficient energy point is obtained
at a voltage of 0.62 V and frequency of 56 MHz compared
with running the core at the same frequency and nominal
1 V. The addition of logic scalability means that if
enough device resources are available a parallel
configuration with six execution units operating at
0.62 V reduces energy by up to 95\% compared with a
single execution unit operating at 1 V and the same
frequency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Thorson:2014:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "93--101",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693732",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Thorson:2014:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "93--101",
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693732",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '14 conference proceedings.",
}
@Article{Ozturk:2015:ASC,
author = "Ozcan Ozturk",
title = "Architectural Support for Cyber-Physical Systems",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "1--1",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694375",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cyber-physical systems are integrations of
computation, communication networks, and physical
dynamics. Although time plays a central role in the
physical world, all widely used software abstractions
lack temporal semantics. The notion of correct
execution of a program written in every widely-used
programming language today does not depend on the
temporal behavior of the program. But temporal behavior
matters in almost all systems, and most particularly in
cyber-physical systems. In this talk, I will argue that
time can and must become part of the semantics of
programs for a large class of applications. To
illustrate that this is both practical and useful, we
will describe a recent effort at Berkeley in the design
and implementation of timing-centric software systems.
Specifically, I will describe PRET machines, which
redefine the instruction-set architecture (ISA) of a
microprocessor to embrace temporal semantics. Such
machines can be used in high-confidence and
safety-critical systems, in energy-constrained systems,
in mixed-criticality systems, and as a Real-Time Unit
(RTU) that cooperates with a general-purpose processor
to provide real-time services, in a manner similar to
how a GPU provides graphics services.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Zhang:2015:MRH,
author = "Yiying Zhang and Jian Yang and Amirsaman Memaripour
and Steven Swanson",
title = "{Mojim}: a Reliable and Highly-Available Non-Volatile
Memory System",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "3--18",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694370",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Next-generation non-volatile memories (NVMs) promise
DRAM-like performance, persistence, and high density.
They can attach directly to processors to form
non-volatile main memory (NVMM) and offer the
opportunity to build very low-latency storage systems.
These high-performance storage systems would be
especially useful in large-scale data center
environments where reliability and availability are
critical. However, providing reliability and
availability to NVMM is challenging, since the latency
of data replication can overwhelm the low latency that
NVMM should provide. We propose Mojim, a system that
provides the reliability and availability that
large-scale storage systems require, while preserving
the performance of NVMM. Mojim achieves these goals by
using a two-tier architecture in which the primary tier
contains a mirrored pair of nodes and the secondary
tier contains one or more secondary backup nodes with
weakly consistent copies of data. Mojim uses
highly-optimized replication protocols, software, and
networking stacks to minimize replication costs and
expose as much of NVMM?s performance as possible. We
evaluate Mojim using raw DRAM as a proxy for NVMM and
using an industrial NVMM emulation system. We find that
Mojim provides replicated NVMM with similar or even
better performance than un-replicated NVMM (reducing
latency by 27\% to 63\% and delivering between 0.4 to
2.7X the throughput). We demonstrate that replacing
MongoDB's built-in replication system with Mojim
improves MongoDB's performance by 3.4 to 4X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Wang:2015:SPC,
author = "Rujia Wang and Lei Jiang and Youtao Zhang and Jun
Yang",
title = "{SD--PCM}: Constructing Reliable Super Dense Phase
Change Memory under Write Disturbance",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "19--31",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694352",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Phase Change Memory (PCM) has better scalability and
smaller cell size comparing to DRAM. However, further
scaling PCM cell in deep sub-micron regime results in
significant thermal based write disturbance (WD).
Naively allocating large inter-cell space increases
cell size from 4F$^2$ ideal to 12F$^2$. While a recent
work mitigates WD along word-lines through disturbance
resilient data encoding, it is ineffective for WD along
bit-lines, which is more severe due to widely adopted $
\mu $Trench structure in constructing PCM cell arrays.
Without mitigating WD along bit-lines, a PCM cell still
has 8F2, which is 100\% larger than the ideal. In this
paper, we propose SD-PCM for achieving reliable write
operations in super dense PCM. In particular, we focus
on mitigating WD along bit-lines such that we can
construct super dense PCM chips with 4F$^2$ cell size,
i.e., the minimal for diode-switch based PCM. Based on
simple verification-n-correction (VnC), we propose
LazyCorrection and PreRead to effectively reduce VnC
overhead and minimize cascading verification during
write. We further propose (n:m)-Alloc for achieving
good tradeoff between VnC overhead minimization and
memory capacity loss. Our experimental results show
that, comparing to a WD-free low density PCM, SD-PCM
achieves 80\% capacity improvement in cell arrays while
incurring around 0-10\% performance degradation when
using different (n:m) allocators.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Young:2015:DWE,
author = "Vinson Young and Prashant J. Nair and Moinuddin K.
Qureshi",
title = "{DEUCE}: Write-Efficient Encryption for Non-Volatile
Memories",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "33--44",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694387",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Phase Change Memory (PCM) is an emerging Non Volatile
Memory (NVM) technology that has the potential to
provide scalable high-density memory systems. While the
non-volatility of PCM is a desirable property in order
to save leakage power, it also has the undesirable
effect of making PCM main memories susceptible to newer
modes of security vulnerabilities, for example,
accessibility to sensitive data if a PCM DIMM gets
stolen. PCM memories can be made secure by encrypting
the data. Unfortunately, such encryption comes with a
significant overhead in terms of bits written to PCM
memory, causing half of the bits in the line to change
on every write, even if the actual number of bits being
written to memory is small. Our studies show that a
typical writeback modifies, on average, only 12\% of
the bits in the cacheline. Thus, encryption causes
almost a 4x increase in the number of bits written to
PCM memories. Such extraneous bit writes cause
significant increase in write power, reduction in write
endurance, and reduction in write bandwidth. To provide
the benefit of secure memory in a write efficient
manner this paper proposes Dual Counter Encryption
(DEUCE). DEUCE is based on the observation that a
typical writeback only changes a few words, so DEUCE
reencrypts only the words that have changed. We show
that DEUCE reduces the number of modified bits per
writeback for a secure memory from 50\% to 24\%, which
improves performance by 27\% and increases lifetime by
2x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Morrison:2015:TBT,
author = "Adam Morrison and Yehuda Afek",
title = "Temporally Bounding {TSO} for Fence-Free Asymmetric
Synchronization",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "45--58",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694374",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper introduces a temporally bounded total store
ordering (TBTSO) memory model, and shows that it
enables nonblocking fence-free solutions to asymmetric
synchronization problems, such as those arising in
memory reclamation and biased locking. TBTSO
strengthens the TSO memory model by bounding the time
it takes a store to drain from the store buffer into
memory. This bound enables devising fence-free
algorithms for asymmetric problems, which require a
performance-critical fast path to synchronize with an
infrequently executed slow path. We demonstrate this by
constructing (1) a fence-free version of the hazard
pointers memory reclamation scheme, and (2) a
fence-free biased lock algorithm which is compatible
with unmanaged environments as it does not rely on safe
points or similar mechanisms. We further argue that
TBTSO can be implemented in hardware with modest
modifications to existing TSO architectures. However,
our design makes assumptions about proprietary
implementation details of commercial hardware; it thus
best serves as a starting point for a discussion on the
feasibility of hardware TBTSO implementation. We also
show how minimal OS support enables the adaptation of
TBTSO algorithms to x86 systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Matveev:2015:RHN,
author = "Alexander Matveev and Nir Shavit",
title = "Reduced Hardware {NOrec}: a Safe and Scalable Hybrid
Transactional Memory",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "59--71",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694393",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Because of hardware TM limitations, software fallbacks
are the only way to make TM algorithms guarantee
progress. Nevertheless, all known software fallbacks to
date, from simple locks to sophisticated versions of
the NOrec Hybrid TM algorithm, have either limited
scalability or weakened semantics. We propose a novel
reduced-hardware (RH) version of the NOrec HyTM
algorithm. Instead of an all-software slow path, in our
RH NOrec the slow-path is a ``mix'' of hardware and
software: one short hardware transaction executes a
maximal amount of initial reads in the hardware, and
the second executes all of the writes. This novel
combination of the RH approach and the NOrec algorithm
delivers the first Hybrid TM that scales while fully
preserving the hardware's original semantics of opacity
and privatization. Our GCC implementation of RH NOrec
is promising in that it shows improved performance
relative to all prior methods, at the concurrency
levels we could test today.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Orr:2015:SUR,
author = "Marc S. Orr and Shuai Che and Ayse Yilmazer and
Bradford M. Beckmann and Mark D. Hill and David A.
Wood",
title = "Synchronization Using Remote-Scope Promotion",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "73--86",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694350",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Heterogeneous system architecture (HSA) and OpenCL
define scoped synchronization to facilitate low
overhead communication across a subset of threads.
Scoped synchronization works well for static sharing
patterns, where consumer threads are known a priori. It
works poorly for dynamic sharing patterns (e.g., work
stealing) where programmers cannot use a faster small
scope due to the rare possibility that the work is
stolen by a thread in a distant slower scope. This puts
programmers in a conundrum: optimize the common case by
synchronizing at a faster small scope or use work
stealing at a slower large scope. In this paper, we
propose to extend scoped synchronization with
remote-scope promotion. This allows the most frequent
sharers to synchronize through a small scope.
Infrequent sharers synchronize by promoting that remote
small scope to a larger shared scope. Synchronization
using remote-scope promotion provides performance
robustness for dynamic workloads, where the benefits
provided by scoped synchronization and work stealing
are hard to anticipate. Compared to a na{\"\i}ve
baseline, static scoped synchronization alone achieves
a 1.07x speedup on average and dynamic work stealing
alone achieves a 1.18x speedup on average. In contrast,
synchronization using remote-scope promotion achieves a
robust 1.25x speedup on average, across a diverse set
of graph benchmarks and inputs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Liu:2015:GHS,
author = "Chang Liu and Austin Harris and Martin Maas and
Michael Hicks and Mohit Tiwari and Elaine Shi",
title = "{GhostRider}: a Hardware-Software System for Memory
Trace Oblivious Computation",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "87--101",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694385",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a new, co-designed compiler and
architecture called GhostRider for supporting privacy
preserving computation in the cloud. GhostRider ensures
all programs satisfy a property called memory-trace
obliviousness (MTO): Even an adversary that observes
memory, bus traffic, and access times while the program
executes can learn nothing about the program's
sensitive inputs and outputs. One way to achieve MTO is
to employ Oblivious RAM (ORAM), allocating all code and
data in a single ORAM bank, and to also disable caches
or fix the rate of memory traffic. This baseline
approach can be inefficient, and so GhostRider's
compiler uses a program analysis to do better,
allocating data to non-oblivious, encrypted RAM (ERAM)
and employing a scratchpad when doing so will not
compromise MTO. The compiler can also allocate to
multiple ORAM banks, which sometimes significantly
reduces access times.We have formalized our approach
and proved it enjoys MTO. Our FPGA-based hardware
prototype and simulation results show that GhostRider
significantly outperforms the baseline strategy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Fletcher:2015:FON,
author = "Christopher W. Fletcher and Ling Ren and Albert Kwon
and Marten van Dijk and Srinivas Devadas",
title = "Freecursive {ORAM}: [Nearly] Free Recursion and
Integrity Verification for Position-based Oblivious
{RAM}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "103--116",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694353",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Oblivious RAM (ORAM) is a cryptographic primitive that
hides memory access patterns as seen by untrusted
storage. Recently, ORAM has been architected into
secure processors. A big challenge for hardware ORAM
schemes is how to efficiently manage the Position Map
(PosMap), a central component in modern ORAM
algorithms. Implemented naively, the PosMap causes ORAM
to be fundamentally unscalable in terms of on-chip
area. On the other hand, a technique called Recursive
ORAM fixes the area problem yet significantly increases
ORAM's performance overhead. To address this challenge,
we propose three new mechanisms. We propose a new ORAM
structure called the PosMap Lookaside Buffer (PLB) and
PosMap compression techniques to reduce the performance
overhead from Recursive ORAM empirically (the latter
also improves the construction asymptotically). Through
simulation, we show that these techniques reduce the
memory bandwidth overhead needed to support recursion
by 95\%, reduce overall ORAM bandwidth by 37\% and
improve overall SPEC benchmark performance by 1.27x. We
then show how our PosMap compression techniques further
facilitate an extremely efficient integrity
verification scheme for ORAM which we call PosMap MAC
(PMMAC). For a practical parameterization, PMMAC
reduces the amount of hashing needed for integrity
checking by $ \geq 68 \times $ relative to prior
schemes and introduces only 7\% performance overhead.
We prototype our mechanisms in hardware and report area
and clock frequency for a complete ORAM design
post-synthesis and post-layout using an ASIC flow in a
32~nm commercial process. With 2 DRAM channels, the
design post-layout runs at 1~GHz and has a total area
of .47~mm2. Depending on PLB-specific parameters, the
PLB accounts for 10\% to 26\% area. PMMAC costs 12\% of
total design area. Our work is the first to prototype
Recursive ORAM or ORAM with any integrity scheme in
hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Chisnall:2015:BPA,
author = "David Chisnall and Colin Rothwell and Robert N. M.
Watson and Jonathan Woodruff and Munraj Vadera and
Simon W. Moore and Michael Roe and Brooks Davis and
Peter G. Neumann",
title = "Beyond the {PDP-11}: Architectural Support for a
Memory-Safe {C} Abstract Machine",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "117--130",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694367",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose a new memory-safe interpretation of the C
abstract machine that provides stronger protection to
benefit security and debugging. Despite ambiguities in
the specification intended to provide implementation
flexibility, contemporary implementations of C have
converged on a memory model similar to the PDP-11, the
original target for C. This model lacks support for
memory safety despite well-documented impacts on
security and reliability. Attempts to change this model
are often hampered by assumptions embedded in a large
body of existing C code, dating back to the memory
model exposed by the original C compiler for the
PDP-11. Our experience with attempting to implement a
memory-safe variant of C on the CHERI experimental
microprocessor led us to identify a number of
problematic idioms. We describe these as well as their
interaction with existing memory safety schemes and the
assumptions that they make beyond the requirements of
the C specification. Finally, we refine the CHERI ISA
and abstract model for C, by combining elements of the
CHERI capability model and fat pointers, and present a
softcore CPU that implements a C abstract machine that
can run legacy C code with strong memory protection
guarantees.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Ma:2015:SDS,
author = "Jiuyue Ma and Xiufeng Sui and Ninghui Sun and Yupeng
Li and Zihao Yu and Bowen Huang and Tianni Xu and
Zhicheng Yao and Yun Chen and Haibin Wang and Lixin
Zhang and Yungang Bao",
title = "Supporting Differentiated Services in Computers via
Programmable Architecture for Resourcing-on-Demand
{(PARD)}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "131--143",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694382",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents PARD, a programmable architecture
for resourcing-on-demand that provides a new
programming interface to convey an application's
high-level information like quality-of-service
requirements to the hardware. PARD enables new
functionalities like fully hardware-supported
virtualization and differentiated services in
computers. PARD is inspired by the observation that a
computer is inherently a network in which hardware
components communicate via packets (e.g., over the NoC
or PCIe). We apply principles of software-defined
networking to this intra-computer network and address
three major challenges. First, to deal with the
semantic gap between high-level applications and
underlying hardware packets, PARD attaches a high-level
semantic tag (e.g., a virtual machine or thread ID) to
each memory-access, I/O, or interrupt packet. Second,
to make hardware components more manageable, PARD
implements programmable control planes that can be
integrated into various shared resources (e.g., cache,
DRAM, and I/O devices) and can differentially process
packets according to tag-based rules. Third, to
facilitate programming, PARD abstracts all control
planes as a device file tree to provide a uniform
programming interface via which users create and apply
tag-based rules. Full-system simulation results show
that by co-locating latencycritical memcached
applications with other workloads PARD can improve a
four-core computer's CPU utilization by up to a factor
of four without significantly increasing tail latency.
FPGA emulation based on a preliminary RTL
implementation demonstrates that the cache control
plane introduces no extra latency and that the memory
control plane can reduce queueing delay for
high-priority memory-access requests by up to a factor
of 5.6.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Omote:2015:IAE,
author = "Yushi Omote and Takahiro Shinagawa and Kazuhiko Kato",
title = "Improving Agility and Elasticity in Bare-metal
Clouds",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "145--159",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694349",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Bare-metal clouds are an emerging
infrastructure-as-a-service (IaaS) that leases physical
machines (bare-metal instances) rather than virtual
machines, allowing resource-intensive applications to
have exclusive access to physical hardware.
Unfortunately, bare-metal instances require
time-consuming or OS-specific tasks for deployment due
to the lack of virtualization layers, thereby
sacrificing several beneficial features of traditional
IaaS clouds such as agility, elasticity, and OS
transparency. We present BMcast, an OS deployment
system with a special-purpose de-virtualizable virtual
machine monitor (VMM) that supports quick and
OS-transparent startup of bare-metal instances. BMcast
performs streaming OS deployment while allowing direct
access to physical hardware from the guest OS, and then
disappears after completing the deployment. Quick
startup of instances improves agility and elasticity
significantly, and OS transparency greatly simplifies
management tasks for cloud customers. Experimental
results have confirmed that BMcast initiated a
bare-metal instance 8.6 times faster than image
copying, and database performance on BMcast during
streaming OS deployment was comparable to that on a
state-of-the-art VMM without performing deployment.
BMcast incurred zero overhead after
de-virtualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Haque:2015:FMI,
author = "Md E. Haque and Yong hun Eom and Yuxiong He and Sameh
Elnikety and Ricardo Bianchini and Kathryn S.
McKinley",
title = "Few-to-Many: Incremental Parallelism for Reducing Tail
Latency in Interactive Services",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "161--175",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694384",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Interactive services, such as Web search,
recommendations, games, and finance, must respond
quickly to satisfy customers. Achieving this goal
requires optimizing tail (e.g., 99th+ percentile)
latency. Although every server is multicore,
parallelizing individual requests to reduce tail
latency is challenging because (1) service demand is
unknown when requests arrive; (2) blindly parallelizing
all requests quickly oversubscribes hardware resources;
and (3) parallelizing the numerous short requests will
not improve tail latency. This paper introduces
Few-to-Many (FM) incremental parallelization, which
dynamically increases parallelism to reduce tail
latency. FM uses request service demand profiles and
hardware parallelism in an offline phase to compute a
policy, represented as an interval table, which
specifies when and how much software parallelism to
add. At runtime, FM adds parallelism as specified by
the interval table indexed by dynamic system load and
request execution time progress. The longer a request
executes, the more parallelism FM adds. We evaluate FM
in Lucene, an open-source enterprise search engine, and
in Bing, a commercial Web search engine. FM improves
the 99th percentile response time up to 32\% in Lucene
and up to 26\% in Bing, compared to prior
state-of-the-art parallelization. Compared to running
requests sequentially in Bing, FM improves tail latency
by a factor of two. These results illustrate that
incremental parallelism is a powerful tool for reducing
tail latency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Colp:2015:PDS,
author = "Patrick Colp and Jiawen Zhang and James Gleeson and
Sahil Suneja and Eyal de Lara and Himanshu Raj and
Stefan Saroiu and Alec Wolman",
title = "Protecting Data on {Smartphones} and Tablets from
Memory Attacks",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "177--189",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694380",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Smartphones and tablets are easily lost or stolen.
This makes them susceptible to an inexpensive class of
memory attacks, such as cold-boot attacks, using a bus
monitor to observe the memory bus, and DMA attacks.
This paper describes Sentry, a system that allows
applications and OS components to store their code and
data on the System-on-Chip (SoC) rather than in DRAM.
We use ARM-specific mechanisms originally designed for
embedded systems, but still present in today's mobile
devices, to protect applications and OS subsystems from
memory attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Dautenhahn:2015:NKO,
author = "Nathan Dautenhahn and Theodoros Kasampalis and Will
Dietz and John Criswell and Vikram Adve",
title = "Nested Kernel: an Operating System Architecture for
Intra-Kernel Privilege Separation",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "191--206",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694386",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Monolithic operating system designs undermine the
security of computing systems by allowing single
exploits anywhere in the kernel to enjoy full
supervisor privilege. The nested kernel operating
system architecture addresses this problem by
``nesting'' a small isolated kernel within a
traditional monolithic kernel. The ``nested kernel''
interposes on all updates to virtual memory
translations to assert protections on physical memory,
thus significantly reducing the trusted computing base
for memory access control enforcement. We incorporated
the nested kernel architecture into FreeBSD on x86-64
hardware while allowing the entire operating system,
including untrusted components, to operate at the
highest hardware privilege level by write-protecting
MMU translations and de-privileging the untrusted part
of the kernel. Our implementation inherently enforces
kernel code integrity while still allowing dynamically
loaded kernel modules, thus defending against code
injection attacks. We also demonstrate that the nested
kernel architecture allows kernel developers to isolate
memory in ways not possible in monolithic kernels by
introducing write-mediation and write-logging services
to protect critical system data structures. Performance
of the nested kernel prototype shows modest overheads:
$< 1\%$ average for Apache and 2.7\% for kernel
compile. Overall, our results and experience show that
the nested kernel design can be retrofitted to existing
monolithic kernels, providing important security
benefits.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Tan:2015:DWS,
author = "Zhangxi Tan and Zhenghao Qian and Xi Chen and Krste
Asanovic and David Patterson",
title = "{DIABLO}: a Warehouse-Scale Computer Network Simulator
using {FPGAs}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "207--221",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694362",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Motivated by rapid software and hardware innovation in
warehouse-scale computing (WSC), we visit the problem
of warehouse-scale network design evaluation. A WSC is
composed of about 30 arrays or clusters, each of which
contains about 3000 servers, leading to a total of
about 100,000 servers per WSC. We found many prior
experiments have been conducted on relatively small
physical testbeds, and they often assume the workload
is static and that computations are only loosely
coupled with the adaptive networking stack. We present
a novel and cost-efficient FPGAbased evaluation
methodology, called Datacenter-In-A-Box at LOw cost
(DIABLO), which treats arrays as whole computers with
tightly integrated hardware and software. We have built
a 3,000-node prototype running the full WSC software
stack. Using our prototype, we have successfully
reproduced a few WSC phenomena, such as TCP Incast and
memcached request latency long tail, and found that
results do indeed change with both scale and with
version of the full software stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Hauswald:2015:SOE,
author = "Johann Hauswald and Michael A. Laurenzano and Yunqi
Zhang and Cheng Li and Austin Rovinski and Arjun
Khurana and Ronald G. Dreslinski and Trevor Mudge and
Vinicius Petrucci and Lingjia Tang and Jason Mars",
title = "{Sirius}: an Open End-to-End Voice and Vision Personal
Assistant and Its Implications for Future Warehouse
Scale Computers",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "223--238",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694347",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As user demand scales for intelligent personal
assistants (IPAs) such as Apple's Siri, Google's Google
Now, and Microsoft's Cortana, we are approaching the
computational limits of current datacenter
architectures. It is an open question how future server
architectures should evolve to enable this emerging
class of applications, and the lack of an open-source
IPA workload is an obstacle in addressing this
question. In this paper, we present the design of
Sirius, an open end-to-end IPA web-service application
that accepts queries in the form of voice and images,
and responds with natural language. We then use this
workload to investigate the implications of four points
in the design space of future accelerator-based server
architectures spanning traditional CPUs, GPUs, manycore
throughput co-processors, and FPGAs. To investigate
future server designs for Sirius, we decompose Sirius
into a suite of 7 benchmarks (Sirius Suite) comprising
the computationally intensive bottlenecks of Sirius. We
port Sirius Suite to a spectrum of accelerator
platforms and use the performance and power trade-offs
across these platforms to perform a total cost of
ownership (TCO) analysis of various server design
points. In our study, we find that accelerators are
critical for the future scalability of IPA services.
Our results show that GPU- and FPGA-accelerated servers
improve the query latency on average by 10x and 16x.
For a given throughput, GPU- and FPGA-accelerated
servers can reduce the TCO of datacenters by 2.6x and
1.4x, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Xu:2015:ALD,
author = "Chao Xu and Felix Xiaozhu Lin and Yuyang Wang and Lin
Zhong",
title = "Automated {OS}-level Device Runtime Power Management",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "239--252",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694360",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Non-CPU devices on a modern system-on-a-chip (SoC),
ranging from accelerators to I/O controllers, account
for a significant portion of the chip area. It is
therefore vital for system energy efficiency that idle
devices can enter a low-power state while still meeting
the performance expectation. This is called device
runtime Power Management (PM) for which individual
device drivers in commodity OSes are held responsible
today. Based on the observations of existing drivers
and their evolution, we consider it harmful to rely on
drivers for device runtime PM. This paper identifies
three pieces of information as essential to device
runtime PM, and shows that they can be obtained without
involving drivers, either by using a software-only
approach, or more efficiently, by adding one register
bit to each device. We thus suggest a structural change
to the current Linux runtime PM framework, replacing
the PM code in all applicable drivers with a single
kernel module called the central PM agent. Experimental
evaluations show that the central PM agent is just as
effective as hand-tuned driver PM code. The paper also
presents a tool called PowerAdvisor that simplifies
driver PM efforts under the current Linux runtime PM
framework. PowerAdvisor analyzes execution traces and
suggests where to insert PM calls in driver source
code. Despite being a best-effort tool, PowerAdvisor
not only reproduces hand-tuned PM code from stock
drivers, but also correctly suggests PM code never
known before. Overall, our experience shows that it is
promising to ultimately free driver developers from
manual PM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Goiri:2015:CTV,
author = "{\'I}{\~n}igo Goiri and Thu D. Nguyen and Ricardo
Bianchini",
title = "{CoolAir}: Temperature- and Variation-Aware Management
for Free-Cooled Datacenters",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "253--265",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694378",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Despite its benefits, free cooling may expose servers
to high absolute temperatures, wide temperature
variations, and high humidity when datacenters are
sited at certain locations. Prior research (in
non-free-cooled datacenters) has shown that high
temperatures and/or wide temporal temperature
variations can harm hardware reliability. In this
paper, we identify the runtime management strategies
required to limit absolute temperatures, temperature
variations, humidity, and cooling energy in free-cooled
datacenters. As the basis for our study, we propose
CoolAir, a system that embodies these strategies. Using
CoolAir and a real free-cooled datacenter prototype, we
show that effective management requires cooling
infrastructures that can act smoothly. In addition, we
show that CoolAir can tightly manage temperature and
significantly reduce temperature variation, often at a
lower cooling cost than existing free-cooled
datacenters. Perhaps most importantly, based on our
results, we derive several principles and lessons that
should guide the design of management systems for
free-cooled datacenters of any size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Mishra:2015:PGM,
author = "Nikita Mishra and Huazhe Zhang and John D. Lafferty
and Henry Hoffmann",
title = "A Probabilistic Graphical Model-based Approach for
Minimizing Energy Under Performance Constraints",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "267--281",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694373",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In many deployments, computer systems are
underutilized --- meaning that applications have
performance requirements that demand less than full
system capacity. Ideally, we would take advantage of
this under-utilization by allocating system resources
so that the performance requirements are met and energy
is minimized. This optimization problem is complicated
by the fact that the performance and power consumption
of various system configurations are often application
--- or even input --- dependent. Thus, practically,
minimizing energy for a performance constraint requires
fast, accurate estimations of application-dependent
performance and power tradeoffs. This paper
investigates machine learning techniques that enable
energy savings by learning Pareto-optimal power and
performance tradeoffs. Specifically, we propose LEO, a
probabilistic graphical model-based learning system
that provides accurate online estimates of an
application's power and performance as a function of
system configuration. We compare LEO to (1) offline
learning, (2) online learning, (3) a heuristic
approach, and (4) the true optimal solution. We find
that LEO produces the most accurate estimates and near
optimal energy savings.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Pang:2015:MLL,
author = "Jun Pang and Chris Dwyer and Alvin R. Lebeck",
title = "More is Less, Less is More: Molecular-Scale Photonic
{NoC} Power Topologies",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "283--296",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694377",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Molecular-scale Network-on-Chip (mNoC) crossbars use
quantum dot LEDs as an on-chip light source, and
chromophores to provide optical signal filtering for
receivers. An mNoC reduces power consumption or enables
scaling to larger crossbars for a reduced energy budget
compared to current nanophotonic NoC crossbars. Since
communication latency is reduced by using a high-radix
crossbar, minimizing power consumption becomes a
primary design target. Conventional Single Writer
Multiple Reader (SWMR) photonic crossbar designs
broadcast all packets, and incur the commensurate
required power, even if only two nodes are
communicating. This paper introduces power topologies,
enabled by unique capabilities of mNoC technology, to
reduce overall interconnect power consumption. A power
topology corresponds to the logical connectivity
provided by a given power mode. Broadcast is one power
mode and it consumes the maximum power. Additional
power modes consume less power but allow a source to
communicate with only a statically defined, potentially
non-contiguous, subset of nodes. Overall interconnect
power is reduced if the more frequently communicating
nodes use modes that consume less power, while less
frequently communicating nodes use modes that consume
more power. We also investigate thread mapping
techniques to fully exploit power topologies. We
explore various mNoC power topologies with one, two and
four power modes for a radix-256 SWMR mNoC crossbar.
Our results show that the combination of power
topologies and intelligent thread mapping can reduce
total mNoC power by up to 51\% on average for a set of
12 SPLASH benchmarks. Furthermore performance is 10\%
better than conventional resonator-based photonic NoCs
and energy is reduced by 72\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Sridharan:2015:MEM,
author = "Vilas Sridharan and Nathan DeBardeleben and Sean
Blanchard and Kurt B. Ferreira and Jon Stearley and
John Shalf and Sudhanva Gurumurthi",
title = "Memory Errors in Modern Systems: The Good, The Bad,
and The Ugly",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "297--310",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694348",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Several recent publications have shown that hardware
faults in the memory subsystem are commonplace. These
faults are predicted to become more frequent in future
systems that contain orders of magnitude more DRAM and
SRAM than found in current memory subsystems. These
memory subsystems will need to provide resilience
techniques to tolerate these faults when deployed in
high-performance computing systems and data centers
containing tens of thousands of nodes. Therefore, it is
critical to understand the efficacy of current hardware
resilience techniques to determine whether they will be
suitable for future systems. In this paper, we present
a study of DRAM and SRAM faults and errors from the
field. We use data from two leadership-class
high-performance computer systems to analyze the
reliability impact of hardware resilience schemes that
are deployed in current systems. Our study has several
key findings about the efficacy of many currently
deployed reliability techniques such as DRAM ECC, DDR
address/command parity, and SRAM ECC and parity. We
also perform a methodological study, and find that
counting errors instead of faults, a common practice
among researchers and data center operators, can lead
to incorrect conclusions about system reliability.
Finally, we use our data to project the needs of future
large-scale systems. We find that SRAM faults are
unlikely to pose a significantly larger reliability
threat in the future, while DRAM faults will be a major
concern and stronger DRAM resilience schemes will be
needed to maintain acceptable failure rates similar to
those found on today's systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Yetim:2015:CMC,
author = "Yavuz Yetim and Sharad Malik and Margaret Martonosi",
title = "{CommGuard}: Mitigating Communication Errors in
Error-Prone Parallel Execution",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "311--323",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694354",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As semiconductor technology scales towards
ever-smaller transistor sizes, hardware fault rates are
increasing. Since important application classes (e.g.,
multimedia, streaming workloads) are
data-error-tolerant, recent research has proposed
techniques that seek to save energy or improve yield by
exploiting error tolerance at the
architecture/microarchitecture level. Even seemingly
error-tolerant applications, however, will crash or
hang due to control-flow/memory addressing errors. In
parallel computation, errors involving inter-thread
communication can have equally catastrophic effects.
Our work explores techniques that mitigate the impact
of potentially catastrophic errors in parallel
computation, while still garnering power, cost, or
yield benefits from data error tolerance. Our proposed
CommGuard solution uses FSM-based checkers to pad and
discard data in order to maintain semantic alignment
between program control flow and the data communicated
between processors. CommGuard techniques are low
overhead and they exploit application information
already provided by some parallel programming languages
(e.g. StreamIt). By converting potentially catastrophic
communication errors into potentially tolerable data
errors, CommGuard allows important streaming
applications like JPEG and MP3 decoding to execute
without crashing and to sustain good output quality,
even for errors as frequent as every 500 $ \mu $ s.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Kim:2015:DEF,
author = "Dohyeong Kim and Yonghwi Kwon and William N. Sumner
and Xiangyu Zhang and Dongyan Xu",
title = "Dual Execution for On the Fly Fine Grained Execution
Comparison",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "325--338",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694394",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Execution comparison has many applications in
debugging, malware analysis, software feature
identification, and intrusion detection. Existing
comparison techniques have various limitations. Some
can only compare at the system event level and require
executions to take the same input. Some require storing
instruction traces that are very space-consuming and
have difficulty dealing with non-determinism. In this
paper, we propose a novel dual execution technique that
allows on-the-fly comparison at the instruction level.
Only differences between the executions are recorded.
It allows executions to proceed in a coupled mode such
that they share the same input sequence with the same
timing, reducing nondeterminism. It also allows them to
proceed in a decoupled mode such that the user can
interact with each one differently. Decoupled
executions can be recoupled to share the same future
inputs and facilitate further comparison. We have
implemented a prototype and applied it to identifying
functional components for reuse, comparative debugging
with new GDB primitives, and understanding real world
regression failures. Our results show that dual
execution is a critical enabling technique for
execution comparison.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Hosek:2015:VUE,
author = "Petr Hosek and Cristian Cadar",
title = "{VARAN} the Unbelievable: an Efficient {$N$}-version
Execution Framework",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "339--353",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694390",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the widespread availability of multi-core
processors, running multiple diversified variants or
several different versions of an application in
parallel is becoming a viable approach for increasing
the reliability and security of software systems. The
key component of such N-version execution (NVX) systems
is a runtime monitor that enables the execution of
multiple versions in parallel. Unfortunately, existing
monitors impose either a large performance overhead or
rely on intrusive kernel-level changes. Moreover, none
of the existing solutions scales well with the number
of versions, since the runtime monitor acts as a
performance bottleneck. In this paper, we introduce
Varan, an NVX framework that combines selective binary
rewriting with a novel event-streaming architecture to
significantly reduce performance overhead and scale
well with the number of versions, without relying on
intrusive kernel modifications. Our evaluation shows
that Varan can run NVX systems based on popular C10k
network servers with only a modest performance
overhead, and can be effectively used to increase
software reliability using techniques such as
transparent failover, live sanitization and
multi-revision execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Malka:2015:REI,
author = "Moshe Malka and Nadav Amit and Muli Ben-Yehuda and Dan
Tsafrir",
title = "{rIOMMU}: Efficient {IOMMU} for {I/O} Devices that
Employ Ring Buffers",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "355--368",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694355",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The IOMMU allows the OS to encapsulate I/O devices in
their own virtual memory spaces, thus restricting their
DMAs to specific memory pages. The OS uses the IOMMU to
protect itself against buggy drivers and
malicious/errant devices. But the added protection
comes at a cost, degrading the throughput of
I/O-intensive workloads by up to an order of magnitude.
This cost has motivated system designers to trade off
some safety for performance, e.g., by leaving stale
information in the IOTLB for a while so as to amortize
costly invalidations. We observe that high-bandwidth
devices---like network and PCIe SSD
controllers---interact with the OS via circular ring
buffers that induce a sequential, predictable workload.
We design a ring IOMMU (rIOMMU) that leverages this
characteristic by replacing the virtual memory page
table hierarchy with a circular, flat table. A flat
table is adequately supported by exactly one IOTLB
entry, making every new translation an implicit
invalidation of the former and thus requiring explicit
invalidations only at the end of I/O bursts. Using
standard networking benchmarks, we show that rIOMMU
provides up to 7.56x higher throughput relative to the
baseline IOMMU, and that it is within 0.77--1.00x the
throughput of a system without IOMMU protection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Liu:2015:PPM,
author = "Daofu Liu and Tianshi Chen and Shaoli Liu and Jinhong
Zhou and Shengyuan Zhou and Olivier Teman and Xiaobing
Feng and Xuehai Zhou and Yunji Chen",
title = "{PuDianNao}: a Polyvalent Machine Learning
Accelerator",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "369--381",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694358",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Machine Learning (ML) techniques are pervasive tools
in various emerging commercial applications, but have
to be accommodated by powerful computer systems to
process very large data. Although general-purpose CPUs
and GPUs have provided straightforward solutions, their
energy-efficiencies are limited due to their excessive
supports for flexibility. Hardware accelerators may
achieve better energy-efficiencies, but each
accelerator often accommodates only a single ML
technique (family). According to the famous
No-Free-Lunch theorem in the ML domain, however, an ML
technique performs well on a dataset may perform poorly
on another dataset, which implies that such accelerator
may sometimes lead to poor learning accuracy. Even if
regardless of the learning accuracy, such accelerator
can still become inapplicable simply because the
concrete ML task is altered, or the user chooses
another ML technique. In this study, we present an ML
accelerator called PuDianNao, which accommodates seven
representative ML techniques, including $k$-means,
$k$-nearest neighbors, naive Bayes, support vector
machine, linear regression, classification tree, and
deep neural network. Benefited from our thorough
analysis on computational primitives and locality
properties of different ML techniques, PuDianNao can
perform up to 1056 GOP/s (e.g., additions and
multiplications) in an area of 3.51 mm^2, and consumes
596 mW only. Compared with the NVIDIA K20M GPU (28nm
process), PuDianNao (65nm process) is 1.20x faster, and
can reduce the energy by 128.41x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Goiri:2015:ABA,
author = "Inigo Goiri and Ricardo Bianchini and Santosh
Nagarakatte and Thu D. Nguyen",
title = "{ApproxHadoop}: Bringing Approximations to {MapReduce}
Frameworks",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "383--397",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694351",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose and evaluate a framework for creating and
running approximation-enabled MapReduce programs.
Specifically, we propose approximation mechanisms that
fit naturally into the MapReduce paradigm, including
input data sampling, task dropping, and accepting and
running a precise and a user-defined approximate
version of the MapReduce code. We then show how to
leverage statistical theories to compute error bounds
for popular classes of MapReduce programs when
approximating with input data sampling and/or task
dropping. We implement the proposed mechanisms and
error bound estimations in a prototype system called
ApproxHadoop. Our evaluation uses MapReduce
applications from different domains, including data
analytics, scientific computing, video encoding, and
machine learning. Our results show that ApproxHadoop
can significantly reduce application execution time
and/or energy consumption when the user is willing to
tolerate small errors. For example, ApproxHadoop can
reduce runtimes by up to 32x when the user can tolerate
an error of 1\% with 95\% confidence. We conclude that
our framework and system can make approximation easily
accessible to many application domains using the
MapReduce model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Ringenburg:2015:MDQ,
author = "Michael Ringenburg and Adrian Sampson and Isaac
Ackerman and Luis Ceze and Dan Grossman",
title = "Monitoring and Debugging the Quality of Results in
Approximate Programs",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "399--411",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694365",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Energy efficiency is a key concern in the design of
modern computer systems. One promising approach to
energy-efficient computation, approximate computing,
trades off output accuracy for significant gains in
energy efficiency. However, debugging the actual cause
of output quality problems in approximate programs is
challenging. This paper presents dynamic techniques to
debug and monitor the quality of approximate
computations. We propose both offline debugging tools
that instrument code to determine the key sources of
output degradation and online approaches that monitor
the quality of deployed applications. We present two
offline debugging techniques and three online
monitoring mechanisms. The first offline tool
identifies correlations between output quality and the
execution of individual approximate operations. The
second tracks approximate operations that flow into a
particular value. Our online monitoring mechanisms are
complementary approaches designed for detecting quality
problems in deployed applications, while still
maintaining the energy savings from approximation. We
present implementations of our techniques and describe
their usage with seven applications. Our online
monitors control output quality while still maintaining
significant energy efficiency gains, and our offline
tools provide new insights into the effects of
approximation on output quality.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Banavar:2015:WEC,
author = "Guruduth Banavar",
title = "{Watson} and the Era of Cognitive Computing",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "413--413",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694376",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the last decade, the availability of massive
amounts of new data, and the development of new machine
learning technologies, have augmented reasoning systems
to give rise to a new class of computing systems. These
``Cognitive Systems'' learn from data, reason from
models, and interact naturally with us, to perform
complex tasks better than either humans or machines can
do by themselves. In essence, cognitive systems help us
perform like the best by penetrating the complexity of
big data and leverage the power of models. One of the
first cognitive systems, called Watson, demonstrated
through a Jeopardy! exhibition match, that it was
capable of answering complex factoid questions as
effectively as the world's champions. Follow-on
cognitive systems perform other tasks, such as
discovery, reasoning, and multi-modal understanding in
a variety of domains, such as healthcare, insurance,
and education. We believe such cognitive systems will
transform every industry and our everyday life for the
better. In this talk, I will give an overview of the
applications, the underlying capabilities, and some of
the key challenges, of cognitive systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Stewart:2015:ZDW,
author = "Gordon Stewart and Mahanth Gowda and Geoffrey Mainland
and Bozidar Radunovic and Dimitrios Vytiniotis and
Cristina Luengo Agullo",
title = "{Ziria}: a {DSL} for Wireless Systems Programming",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "415--428",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694368",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software-defined radio (SDR) brings the flexibility of
software to wireless protocol design, promising an
ideal platform for innovation and rapid protocol
deployment. However, implementing modern wireless
protocols on existing SDR platforms often requires
careful hand-tuning of low-level code, which can
undermine the advantages of software. Ziria is a new
domain-specific language (DSL) that offers programming
abstractions suitable for wireless physical (PHY) layer
tasks while emphasizing the pipeline reconfiguration
aspects of PHY programming. The Ziria compiler
implements a rich set of specialized optimizations,
such as lookup table generation and pipeline fusion. We
also offer a novel --- due to pipeline reconfiguration
--- algorithm to optimize the data widths of
computations in Ziria pipelines. We demonstrate the
programming flexibility of Ziria and the performance of
the generated code through a detailed evaluation of a
line-rate Ziria WiFi 802.11a/g implementation that is
on par and in many cases outperforms a hand-tuned
state-of-the-art C++ implementation on commodity
CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Mullapudi:2015:PAO,
author = "Ravi Teja Mullapudi and Vinay Vasista and Uday
Bondhugula",
title = "{PolyMage}: Automatic Optimization for Image
Processing Pipelines",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "429--443",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694364",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents the design and implementation of
PolyMage, a domain-specific language and compiler for
image processing pipelines. An image processing
pipeline can be viewed as a graph of interconnected
stages which process images successively. Each stage
typically performs one of point-wise, stencil,
reduction or data-dependent operations on image pixels.
Individual stages in a pipeline typically exhibit
abundant data parallelism that can be exploited with
relative ease. However, the stages also require high
memory bandwidth preventing effective utilization of
parallelism available on modern architectures. For
applications that demand high performance, the
traditional options are to use optimized libraries like
OpenCV or to optimize manually. While using libraries
precludes optimization across library routines, manual
optimization accounting for both parallelism and
locality is very tedious. The focus of our system,
PolyMage, is on automatically generating
high-performance implementations of image processing
pipelines expressed in a high-level declarative
language. Our optimization approach primarily relies on
the transformation and code generation capabilities of
the polyhedral compiler framework. To the best of our
knowledge, this is the first model-driven compiler for
image processing pipelines that performs complex
fusion, tiling, and storage optimization automatically.
Experimental results on a modern multicore system show
that the performance achieved by our automatic approach
is up to 1.81x better than that achieved through manual
tuning in Halide, a state-of-the-art language and
compiler for image processing pipelines. For a camera
raw image processing pipeline, our performance is
comparable to that of a hand-tuned implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Heckey:2015:CMC,
author = "Jeff Heckey and Shruti Patil and Ali JavadiAbhari and
Adam Holmes and Daniel Kudrow and Kenneth R. Brown and
Diana Franklin and Frederic T. Chong and Margaret
Martonosi",
title = "Compiler Management of Communication and Parallelism
for Quantum Computation",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "445--456",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694357",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Quantum computing (QC) offers huge promise to
accelerate a range of computationally intensive
benchmarks. Quantum computing is limited, however, by
the challenges of decoherence: i.e., a quantum state
can only be maintained for short windows of time before
it decoheres. While quantum error correction codes can
protect against decoherence, fast execution time is the
best defense against decoherence, so efficient
architectures and effective scheduling algorithms are
necessary. This paper proposes the Multi-SIMD QC
architecture and then proposes and evaluates effective
schedulers to map benchmark descriptions onto
Multi-SIMD architectures. The Multi-SIMD model consists
of a small number of SIMD regions, each of which may
support operations on up to thousands of qubits per
cycle. Efficient Multi-SIMD operation requires
efficient scheduling. This work develops schedulers to
reduce communication requirements of qubits between
operating regions, while also improving parallelism.We
find that communication to global memory is a dominant
cost in QC. We also note that many quantum benchmarks
have long serial operation paths (although each
operation may be data parallel). To exploit this
characteristic, we introduce Longest-Path-First
Scheduling (LPFS) which pins operations to SIMD regions
to keep data in-place and reduce communication to
memory. The use of small, local scratchpad memories
also further reduces communication. Our results show a
3\% to 308\% improvement for LPFS over conventional
scheduling algorithms, and an additional 3\% to 64\%
improvement using scratchpad memories. Our work is the
most comprehensive software-to-quantum toolflow
published to date, with efficient and practical
scheduling techniques that reduce communication and
increase parallelism for full-scale quantum code
executing up to a trillion quantum gate operations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Hassaan:2015:KDG,
author = "Muhammad Amber Hassaan and Donald D. Nguyen and Keshav
K. Pingali",
title = "Kinetic Dependence Graphs",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "457--471",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694363",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Task graphs or dependence graphs are used in runtime
systems to schedule tasks for parallel execution. In
problem domains such as dense linear algebra and signal
processing, dependence graphs can be generated from a
program by static analysis. However, in emerging
problem domains such as graph analytics, the set of
tasks and dependences between tasks in a program are
complex functions of runtime values and cannot be
determined statically. In this paper, we introduce a
novel approach for exploiting parallelism in such
programs. This approach is based on a data structure
called the kinetic dependence graph (KDG), which
consists of a dependence graph together with update
rules that incrementally update the graph to reflect
changes in the dependence structure whenever a task is
completed. We have implemented a simple programming
model that allows programmers to write these
applications at a high level of abstraction, and a
runtime within the Galois system [15] that builds the
KDG automatically and executes the program in parallel.
On a suite of programs that are difficult to
parallelize otherwise, we have obtained speedups of up
to 33 on 40 cores, out-performing third-party
implementations in many cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Sidiroglou-Douskos:2015:TAI,
author = "Stelios Sidiroglou-Douskos and Eric Lahtinen and
Nathan Rittenhouse and Paolo Piselli and Fan Long and
Deokhwan Kim and Martin Rinard",
title = "Targeted Automatic Integer Overflow Discovery Using
Goal-Directed Conditional Branch Enforcement",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "473--486",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694389",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present a new technique and system, DIODE, for
auto- matically generating inputs that trigger
overflows at memory allocation sites. DIODE is designed
to identify relevant sanity checks that inputs must
satisfy to trigger overflows at target memory
allocation sites, then generate inputs that satisfy
these sanity checks to successfully trigger the
overflow. DIODE works with off-the-shelf, production
x86 binaries. Our results show that, for our benchmark
set of applications, and for every target memory
allocation site exercised by our seed inputs (which the
applications process correctly with no overflows),
either (1) DIODE is able to generate an input that
triggers an overflow at that site or (2) there is no
input that would trigger an overflow for the observed
target expression at that site.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Dhawan:2015:ASS,
author = "Udit Dhawan and Catalin Hritcu and Raphael Rubin and
Nikos Vasilakis and Silviu Chiricescu and Jonathan M.
Smith and Thomas F. {Knight, Jr.} and Benjamin C.
Pierce and Andre DeHon",
title = "Architectural Support for Software-Defined Metadata
Processing",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "487--502",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694383",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Optimized hardware for propagating and checking
software-programmable metadata tags can achieve low
runtime overhead. We generalize prior work on hardware
tagging by considering a generic architecture that
supports software-defined policies over metadata of
arbitrary size and complexity; we introduce several
novel microarchitectural optimizations that keep the
overhead of this rich processing low. Our model thus
achieves the efficiency of previous hardware-based
approaches with the flexibility of the software-based
ones. We demonstrate this by using it to enforce four
diverse safety and security policies---spatial and
temporal memory safety, taint tracking, control-flow
integrity, and code and data separation---plus a
composite policy that enforces all of them
simultaneously. Experiments on SPEC CPU2006 benchmarks
with a PUMP-enhanced RISC processor show modest impact
on runtime (typically under 10\%) and power ceiling
(less than 10\%), in return for some increase in energy
usage (typically under 60\%) and area for on-chip
memory structures (110\%).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Zhang:2015:HDL,
author = "Danfeng Zhang and Yao Wang and G. Edward Suh and
Andrew C. Myers",
title = "A Hardware Design Language for Timing-Sensitive
Information-Flow Security",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "503--516",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694372",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Information security can be compromised by leakage via
low-level hardware features. One recently prominent
example is cache probing attacks, which rely on timing
channels created by caches. We introduce a hardware
design language, SecVerilog, which makes it possible to
statically analyze information flow at the hardware
level. With SecVerilog, systems can be built with
verifiable control of timing channels and other
information channels. SecVerilog is Verilog, extended
with expressive type annotations that enable precise
reasoning about information flow. It also comes with
rigorous formal assurance: we prove that SecVerilog
enforces timing-sensitive noninterference and thus
ensures secure information flow. By building a secure
MIPS processor and its caches, we demonstrate that
SecVerilog makes it possible to build complex hardware
designs with verified security, yet with low overhead
in time, space, and HW designer effort.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Hicks:2015:SLR,
author = "Matthew Hicks and Cynthia Sturton and Samuel T. King
and Jonathan M. Smith",
title = "{SPECS}: a Lightweight Runtime Mechanism for
Protecting Software from Security-Critical Processor
Bugs",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "517--529",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694366",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Processor implementation errata remain a problem, and
worse, a subset of these bugs are security-critical. We
classified 7 years of errata from recent commercial
processors to understand the magnitude and severity of
this problem, and found that of 301 errata analyzed, 28
are security-critical. We propose the SECURITY-CRITICAL
PROCESSOR ER- RATA CATCHING SYSTEM (SPECS) as a
low-overhead solution to this problem. SPECS employs a
dynamic verification strategy that is made lightweight
by limiting protection to only security-critical
processor state. As a proof-of- concept, we implement a
hardware prototype of SPECS in an open source
processor. Using this prototype, we evaluate SPECS
against a set of 14 bugs inspired by the types of
security-critical errata we discovered in the
classification phase. The evaluation shows that SPECS
is 86\% effective as a defense when deployed using only
ISA-level state; incurs less than 5\% area and power
overhead; and has no software run-time overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Duan:2015:AMF,
author = "Yuelu Duan and Nima Honarmand and Josep Torrellas",
title = "Asymmetric Memory Fences: Optimizing Both Performance
and Implementability",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "531--543",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694388",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "There have been several recent efforts to improve the
performance of fences. The most aggressive designs
allow post-fence accesses to retire and complete before
the fence completes. Unfortunately, such designs
present implementation difficulties due to their
reliance on global state and structures. This paper's
goal is to optimize both the performance and the
implementability of fences. We start-off with a design
like the most aggressive ones but without the global
state. We call it Weak Fence or wF. Since the
concurrent execution of multiple wFs can deadlock, we
combine wFs with a conventional fence (i.e., Strong
Fence or sF) for the less performance-critical
thread(s). We call the result an Asymmetric fence
group. We also propose a taxonomy of Asymmetric fence
groups under TSO. Compared to past aggressive fences,
Asymmetric fence groups both are substantially easier
to implement and have higher average performance. The
two main designs presented (WS+ and W+) speed-up
workloads under TSO by an average of 13\% and 21\%,
respectively, over conventional fences.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Sung:2015:DES,
author = "Hyojin Sung and Sarita V. Adve",
title = "{DeNovoSync}: Efficient Support for Arbitrary
Synchronization without Writer-Initiated
Invalidations",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "545--559",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694356",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Current shared-memory hardware is complex and
inefficient. Prior work on the DeNovo coherence
protocol showed that disciplined shared-memory
programming models can enable more complexity-,
performance-, and energy-efficient hardware than the
state-of-the-art MESI protocol. DeNovo, however,
severely restricted the synchronization constructs an
application can support. This paper proposes
DeNovoSync, a technique to support arbitrary
synchronization in DeNovo. The key challenge is that
DeNovo exploits race-freedom to use reader-initiated
local self-invalidations (instead of conventional
writer-initiated remote cache invalidations) to ensure
coherence. Synchronization accesses are inherently racy
and not directly amenable to self-invalidations.
DeNovoSync addresses this challenge using a novel
combination of registration of all synchronization
reads with a judicious hardware backoff to limit
unnecessary registrations. For a wide variety of
synchronization constructs and applications, compared
to MESI, DeNovoSync shows comparable or up to 22\%
lower execution time and up to 58\% lower network
traffic, enabling DeNovo's advantages for a much
broader class of software than previously possible.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Sengupta:2015:HSD,
author = "Aritra Sengupta and Swarnendu Biswas and Minjia Zhang
and Michael D. Bond and Milind Kulkarni",
title = "Hybrid Static-Dynamic Analysis for Statically Bounded
Region Serializability",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "561--575",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694379",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Data races are common. They are difficult to detect,
avoid, or eliminate, and programmers sometimes
introduce them intentionally. However, shared-memory
programs with data races have unexpected, erroneous
behaviors. Intentional and unintentional data races
lead to atomicity and sequential consistency (SC)
violations, and they make it more difficult to
understand, test, and verify software. Existing
approaches for providing stronger guarantees for racy
executions add high run-time overhead and/or rely on
custom hardware. This paper shows how to provide
stronger semantics for racy programs while providing
relatively good performance on commodity systems. A
novel hybrid static--dynamic analysis called
\emph{EnfoRSer} provides end-to-end support for a
memory model called \emph{statically bounded region
serializability} (SBRS) that is not only stronger than
weak memory models but is strictly stronger than SC.
EnfoRSer uses static compiler analysis to transform
regions, and dynamic analysis to detect and resolve
conflicts at run time. By demonstrating commodity
support for a reasonably strong memory model with
reasonable overheads, we show its potential as an
always-on execution model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Alglave:2015:GCW,
author = "Jade Alglave and Mark Batty and Alastair F. Donaldson
and Ganesh Gopalakrishnan and Jeroen Ketema and Daniel
Poetzl and Tyler Sorensen and John Wickerson",
title = "{GPU} Concurrency: Weak Behaviours and Programming
Assumptions",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "577--591",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694391",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Concurrency is pervasive and perplexing, particularly
on graphics processing units (GPUs). Current
specifications of languages and hardware are
inconclusive; thus programmers often rely on folklore
assumptions when writing software. To remedy this state
of affairs, we conducted a large empirical study of the
concurrent behaviour of deployed GPUs. Armed with
litmus tests (i.e. short concurrent programs), we
questioned the assumptions in programming guides and
vendor documentation about the guarantees provided by
hardware. We developed a tool to generate thousands of
litmus tests and run them under stressful workloads. We
observed a litany of previously elusive weak
behaviours, and exposed folklore beliefs about GPU
programming---often supported by official
tutorials---as false. As a way forward, we propose a
model of Nvidia GPU hardware, which correctly models
every behaviour witnessed in our experiments. The model
is a variant of SPARC Relaxed Memory Order (RMO),
structured following the GPU concurrency hierarchy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Park:2015:CCP,
author = "Jason Jong Kyu Park and Yongjun Park and Scott
Mahlke",
title = "{Chimera}: Collaborative Preemption for Multitasking
on a Shared {GPU}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "593--606",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694346",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The demand for multitasking on graphics processing
units (GPUs) is constantly increasing as they have
become one of the default components on modern computer
systems along with traditional processors (CPUs).
Preemptive multitasking on CPUs has been primarily
supported through context switching. However, the same
preemption strategy incurs substantial overhead due to
the large context in GPUs. The overhead comes in two
dimensions: a preempting kernel suffers from a long
preemption latency, and the system throughput is wasted
during the switch. Without precise control over the
large preemption overhead, multitasking on GPUs has
little use for applications with strict latency
requirements. In this paper, we propose Chimera, a
collaborative preemption approach that can precisely
control the overhead for multitasking on GPUs. Chimera
first introduces streaming multiprocessor (SM)
flushing, which can instantly preempt an SM by
detecting and exploiting idempotent execution. Chimera
utilizes flushing collaboratively with two previously
proposed preemption techniques for GPUs, namely context
switching and draining to minimize throughput overhead
while achieving a required preemption latency.
Evaluations show that Chimera violates the deadline for
only 0.2\% of preemption requests when a 15us
preemption latency constraint is used. For
multi-programmed workloads, Chimera can improve the
average normalized turnaround time by 5.5x, and system
throughput by 12.2\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Agarwal:2015:PPS,
author = "Neha Agarwal and David Nellans and Mark Stephenson and
Mike O'Connor and Stephen W. Keckler",
title = "Page Placement Strategies for {GPUs} within
Heterogeneous Memory Systems",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "607--618",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694381",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Systems from smartphones to supercomputers are
increasingly heterogeneous, being composed of both CPUs
and GPUs. To maximize cost and energy efficiency, these
systems will increasingly use globally-addressable
heterogeneous memory systems, making choices about
memory page placement critical to performance. In this
work we show that current page placement policies are
not sufficient to maximize GPU performance in these
heterogeneous memory systems. We propose two new page
placement policies that improve GPU performance: one
application agnostic and one using application profile
information. Our application agnostic policy,
bandwidth-aware (BW-AWARE) placement, maximizes GPU
throughput by balancing page placement across the
memories based on the aggregate memory bandwidth
available in a system. Our simulation-based results
show that BW-AWARE placement outperforms the existing
Linux INTERLEAVE and LOCAL policies by 35\% and 18\% on
average for GPU compute workloads. We build upon
BW-AWARE placement by developing a compiler-based
profiling mechanism that provides programmers with
information about GPU application data structure access
patterns. Combining this information with simple
program-annotated hints about memory placement, our
hint-based page placement approach performs within 90\%
of oracular page placement on average, largely
mitigating the need for costly dynamic page tracking
and migration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Zhao:2015:FPS,
author = "Zhijia Zhao and Xipeng Shen",
title = "On-the-Fly Principled Speculation for {FSM}
Parallelization",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "619--630",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694369",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Finite State Machine (FSM) is the backbone of an
important class of applications in many domains. Its
parallelization has been extremely difficult due to
inherent strong dependences in the computation.
Recently, principled speculation shows good promise to
solve the problem. However, the reliance on offline
training makes the approach inconvenient to adopt and
hard to apply to many practical FSM applications, which
often deal with a large variety of inputs different
from training inputs. This work presents an assembly of
techniques that completely remove the needs for offline
training. The techniques include a set of theoretical
results on inherent properties of FSMs, and two newly
designed dynamic optimizations for efficient FSM
characterization. The new techniques, for the first
time, make principle speculation applicable on the fly,
and enables swift, automatic configuration of
speculative parallelizations to best suit a given FSM
and its current input. They eliminate the fundamental
barrier for practical adoption of principle speculation
for FSM parallelization. Experiments show that the new
techniques give significantly higher speedups for some
difficult FSM applications in the presence of input
changes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{David:2015:ACS,
author = "Tudor David and Rachid Guerraoui and Vasileios
Trigonakis",
title = "Asynchronized Concurrency: The Secret to Scaling
Concurrent Search Data Structures",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "631--644",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694359",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We introduce ``asynchronized concurrency (ASCY),'' a
paradigm consisting of four complementary programming
patterns. ASCY calls for the design of concurrent
search data structures (CSDSs) to resemble that of
their sequential counterparts. We argue that ASCY leads
to implementations which are portably scalable: they
scale across different types of hardware platforms,
including single and multi-socket ones, for various
classes of workloads, such as read-only and read-write,
and according to different performance metrics,
including throughput, latency, and energy. We
substantiate our thesis through the most exhaustive
evaluation of CSDSs to date, involving 6 platforms, 22
state-of-the-art CSDS algorithms, 10 re-engineered
state-of-the-art CSDS algorithms following the ASCY
patterns, and 2 new CSDS algorithms designed with ASCY
in mind. We observe up to 30\% improvements in
throughput in the re-engineered algorithms, while our
new algorithms out-perform the state-of-the-art
alternatives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Bhatotia:2015:ITL,
author = "Pramod Bhatotia and Pedro Fonseca and Umut A. Acar and
Bj{\"o}rn B. Brandenburg and Rodrigo Rodrigues",
title = "{iThreads}: a Threading Library for Parallel
Incremental Computation",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "645--659",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694371",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Incremental computation strives for efficient
successive runs of applications by re-executing only
those parts of the computation that are affected by a
given input change instead of recomputing everything
from scratch. To realize these benefits automatically,
we describe iThreads, a threading library for parallel
incremental computation. iThreads supports unmodified
shared-memory multithreaded programs: it can be used as
a replacement for pthreads by a simple exchange of
dynamically linked libraries, without even recompiling
the application code. To enable such an interface, we
designed algorithms and an implementation to operate at
the compiled binary code level by leveraging
MMU-assisted memory access tracking and process-based
thread isolation. Our evaluation on a multicore
platform using applications from the PARSEC and Phoenix
benchmarks and two case-studies shows significant
performance gains.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Gidra:2015:NGC,
author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and
Marc Shapiro and Nhan Nguyen",
title = "{NumaGiC}: a Garbage Collector for Big Data on Big
{NUMA} Machines",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "661--673",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694361",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "On contemporary cache-coherent Non-Uniform Memory
Access (ccNUMA) architectures, applications with a
large memory footprint suffer from the cost of the
garbage collector (GC), because, as the GC scans the
reference graph, it makes many remote memory accesses,
saturating the interconnect between memory nodes. We
address this problem with NumaGiC, a GC with a
mostly-distributed design. In order to maximise memory
access locality during collection, a GC thread avoids
accessing a different memory node, instead notifying a
remote GC thread with a message; nonetheless, NumaGiC
avoids the drawbacks of a pure distributed design,
which tends to decrease parallelism. We compare NumaGiC
with Parallel Scavenge and NAPS on two different ccNUMA
architectures running on the Hotspot Java Virtual
Machine of OpenJDK 7. On Spark and Neo4j, two
industry-strength analytics applications, with heap
sizes ranging from 160GB to 350GB, and on SPECjbb2013
and SPECjbb2005, ourgc improves overall performance by
up to 45\% over NAPS (up to 94\% over Parallel
Scavenge), and increases the performance of the
collector itself by up to 3.6x over NAPS (up to 5.4x
over Parallel Scavenge).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Nguyen:2015:FCR,
author = "Khanh Nguyen and Kai Wang and Yingyi Bu and Lu Fang
and Jianfei Hu and Guoqing Xu",
title = "{FACADE}: a Compiler and Runtime for (Almost)
Object-Bounded Big Data Applications",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "675--690",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694345",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The past decade has witnessed the increasing demands
on data-driven business intelligence that led to the
proliferation of data-intensive applications. A managed
object-oriented programming language such as Java is
often the developer's choice for implementing such
applications, due to its quick development cycle and
rich community resource. While the use of such
languages makes programming easier, their automated
memory management comes at a cost. When the managed
runtime meets Big Data, this cost is significantly
magnified and becomes a scalability-prohibiting
bottleneck. This paper presents a novel compiler
framework, called Facade, that can generate
highly-efficient data manipulation code by
automatically transforming the data path of an existing
Big Data application. The key treatment is that in the
generated code, the number of runtime heap objects
created for data types in each thread is (almost)
statically bounded, leading to significantly reduced
memory management cost and improved scalability. We
have implemented Facade and used it to transform 7
common applications on 3 real-world, already
well-optimized Big Data frameworks: GraphChi, Hyracks,
and GPS. Our experimental results are very positive:
the generated programs have (1) achieved a 3\%--48\%
execution time reduction and an up to 88X GC reduction;
(2) consumed up to 50\% less memory, and (3) scaled to
much larger datasets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Agrawal:2015:ASD,
author = "Varun Agrawal and Abhiroop Dabral and Tapti Palit and
Yongming Shen and Michael Ferdman",
title = "Architectural Support for Dynamic Linking",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "691--702",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694392",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "All software in use today relies on libraries,
including standard libraries (e.g., C, C++) and
application-specific libraries (e.g., libxml, libpng).
Most libraries are loaded in memory and dynamically
linked when programs are launched, resolving symbol
addresses across the applications and libraries.
Dynamic linking has many benefits: It allows code to be
reused between applications, conserves memory (because
only one copy of a library is kept in memory for all
the applications that share it), and allows libraries
to be patched and updated without modifying programs,
among numerous other benefits. However, these benefits
come at the cost of performance. For every call made to
a function in a dynamically linked library, a
trampoline is used to read the function address from a
lookup table and branch to the function, incurring
memory load and branch operations. Static linking
avoids this performance penalty, but loses all the
benefits of dynamic linking. Given its myriad benefits,
dynamic linking is the predominant choice today,
despite the performance cost. In this work, we propose
a speculative hardware mechanism to optimize dynamic
linking by avoiding executing the trampolines for
library function calls, providing the benefits of
dynamic linking with the performance of static linking.
Speculatively skipping the memory load and branch
operations of the library call trampolines improves
performance by reducing the number of executed
instructions and gains additional performance by
reducing pressure on the instruction and data caches,
TLBs, and branch predictors. Because the indirect
targets of library call trampolines do not change
during program execution, our speculative mechanism
never misspeculates in practice. We evaluate our
technique on real hardware with production software and
observe up to 4\% speedup using only 1.5KB of on-chip
storage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Chien:2015:CSH,
author = "Andrew A. Chien and Tung Thanh-Hoang and Dilip
Vasudevan and Yuanwei Fang and Amirali Shambayati",
title = "$ 10 \times 10 $: a Case Study in Highly-Programmable
and Energy-Efficient Heterogeneous Federated
Architecture",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "3",
pages = "2--9",
month = may,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2856113.2856115",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Dec 21 18:10:56 MST 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Customized architecture is widely recognized as an
important approach for improved performance and energy
efficiency. To balance generality and customization
benefit, researchers have proposed to federate
heterogeneous micro-engines. Using the $ 10 \times 10 $
architecture and an integrated image and vision
benchmark as a case study, we explore the performance
and energy benefits achievable. Results for current
32nm technology and DDR3 memory show $ 10 \times 10 $
architecture benefits of 140$ \times $ performance and
72$ \times $ energy overall. Adding 3D-stacked DRAM
increase benefits to 171$ \times $ (performance) and
100$ \times $ (energy). Finally, considering future 7nm
transistor process, benefits as large as 597$ \times $
(performance) and 137$ \times $ energy are observed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2015:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "3",
pages = "10--16",
month = may,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2856113.2856117",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Dec 21 18:10:56 MST 2015",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Herbordt:2015:LLG,
author = "Martin Herbordt and Miriam Leeser",
title = "Off-Loading {LET} Generation to {PEACH2}: a Switching
Hub for High Performance {GPU} Clusters",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "3--8",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927966",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A hardware local essential tree (LET) generator used
in an N-body simulation is implemented on the FPGA of
PEACH2 (PCI Express Adaptive Communication Hub ver2), a
low latency switching hub for high performance GPU
clusters. By using the pipelined on-the-fly execution
with a multipole acceptance criterion judging module
and a data updating module, the generation performance
is 2.2 times faster than that with the CPU. When data
communication is considered, the performance was 7.2
times as the case with the CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Okina:2015:PPP,
author = "Koji Okina and Rie Soejima and Kota Fukumoto and
Yuichiro Shibata and Kiyoshi Oguri",
title = "Power Performance Profiling of {$3$-D} Stencil
Computation on an {FPGA} Accelerator for Efficient
Pipeline Optimization",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "9--14",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927967",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper discusses power-performance optimization
for 3-D stencil computing on a stream-oriented FPGA
accelerator with high-level synthesis. Taking a heat
conduction simulation and an FDTD electromagnetic field
simulation as benchmark applications, power-performance
profiling results are presented focusing on the effect
of high-level pipeline parameters. As a result, it is
shown that the optimal power efficiency can be achieved
basically by optimizing the execution performance. The
relationship between power efficiency and the clock
frequency is also discussed.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Lashgar:2015:CSR,
author = "Ahmad Lashgar and Ebad Salehi and Amirali Baniasadi",
title = "A Case Study in Reverse Engineering {GPGPUs}:
Outstanding Memory Handling Resources",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "15--21",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927968",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "During recent years, GPU micro-architectures have
changed dramatically, evolving into powerful many-core
deep-multithreaded platforms for parallel workloads.
While important micro-architectural modifications
continue to appear in every new generation of these
processors, unfortunately, little is known about the
details of these innovative designs. One of the key
questions in understanding GPUs is how they deal with
outstanding memory misses. Our goal in this study is to
find answers to this question. To this end, we develop
a set of micro-benchmarks in CUDA to understand the
outstanding memory requests handling resources.
Particularly, we study two NVIDIA GPGPUs (Fermi and
Kepler) and estimate their capability in handling
outstanding memory requests. We show that Kepler can
issue nearly 32X higher number of outstanding memory
requests, compared to Fermi. We explain this
enhancement by Kepler's architectural modifications in
outstanding memory request handling resources.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Hayashi:2015:LRO,
author = "Ami Hayashi and Yuta Tokusashi and Hiroki Matsutani",
title = "A Line Rate Outlier Filtering {FPGA NIC} using {10GbE}
Interface",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "22--27",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927969",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As data sets grow rapidly in size and the number, an
outlier detection that filters unnecessary normal
information becomes important. In this paper, we
propose to move the unsupervised outlier detection from
an application layer to a network interface card (NIC).
Only anomalous items or events are received for a
network protocol stack and the other packets are
discarded at the NIC. The demands for storage and
computation costs at a host are thus dramatically
reduced. However, because normal items are discarded at
the NIC and the application layer can no longer know
what is normal, in our approach, the application at the
host periodically peeks at the NIC buffer. We select an
outlier detection based on the Mahalanobis distance as
one of the simplest algorithms. Our approach is
implemented on an FPGA-based NIC that has 10GbE
interfaces. The sampling frequency of the NIC buffer
vs. outlier detection precision is analyzed. Real
experiments using the FPGA NIC demonstrate a 14,000,000
samples-per-second throughput in performance, which is
close to the 10GbE line rate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Jain:2015:ADA,
author = "Abhishek Kumar Jain and Xiangwei Li and Suhaib A.
Fahmy and Douglas L. Maskell",
title = "Adapting the {DySER} Architecture with {DSP} Blocks as
an Overlay for the {Xilinx Zynq}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "28--33",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927970",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Coarse-grained overlay architectures have been shown
to be effective when paired with general purpose
processors, offering software-like programmability,
fast compilation, and improved design productivity.
These architectures enable general purpose hardware
accelerators, allowing hardware design at a higher
level of abstraction, but at the cost of area and
performance overheads. This paper examines the DySER
overlay architecture as a hardware accelerator paired
with a general purpose processor in a hybrid FPGA such
as the Xilinx Zynq. We evaluate the DySER architecture
mapped on the Xilinx Zynq and show that it suffers from
a significant area and performance overhead. We then
propose an improved functional unit architecture using
the flexibility of the DSP48E1 primitive which results
in a 2.5 times frequency improvement and 25\% area
reduction compared to the original functional unit
architecture. We demonstrate that this improvement
results in the routing architecture becoming the
bottleneck in performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{delaChevallerie:2015:FLH,
author = "David de la Chevallerie and Jens Korinth and Andreas
Koch",
title = "{ffLink}: a Lightweight High-Performance Open-Source
{PCI Express Gen3} Interface for Reconfigurable
Accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "34--39",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927971",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We describe the architecture and implementation of
ffLink, a high-performance PCIe Gen3 interface for
attaching reconfigurable accelerators on Xilinx Virtex
7 FPGA devices to Linux-based hosts. ffLink encompasses
both hardware as well as flexible operating system
components that allow a tailoring of the infrastructure
to the specific data transfer needs of the application.
When configured to use multiple DMA engines to hide
transfer latencies, ffLink achieves a throughput of up
to 7 GB/s, which is 95\% of the maximum throughput of
an eight-lane PCIe interface, while requiring just 11\%
of device area on a mid-size FPGA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Hmid:2015:TAR,
author = "Soukaina N. Hmid and Jose G. F. Coutinho and Wayne
Luk",
title = "A Transfer-Aware Runtime System for Heterogeneous
Asynchronous Parallel Execution",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "40--45",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927972",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a novel resource management
approach for efficiently managing the computation and
the data movements between the host and its
accelerators in a heterogeneous platform. Our approach
is based on OmpSs, with support for multi-core CPUs,
GPGPUs and Maxeler Data Flow Engines based on FPGA
technology; it exploits data locality, data transfer
costs and data dependencies. The proposed approach is
supported by an offline learning process coupled with
online monitoring, allowing performance to be estimated
while learning from past observations during execution.
Its performance is compared against the current OmpSs
scheduler using five benchmarks: matrix multiplication,
bitonic sort, N-body simulation, Cholesky decomposition
and AdPredictor. The results show the proposed approach
can achieve up to 4.25 times speed-up for Cholesky
decomposition. Moreover, an evaluation with AdPredictor
indicates that the FPGA version is up to 46 times
faster than the CPU version for large task sizes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Al-Wattar:2015:EMA,
author = "Ahmed Al-Wattar and Shawki Areibi and Gary Grewal",
title = "Efficient Mapping and Allocation of Execution Units to
Task Graphs using an Evolutionary Framework",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "46--51",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927973",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Partial dynamic reconfiguration of FPGAs gives
designers the capability to change certain parts of the
hardware while other parts remain active and in use.
This provides several benefits including reducing
device count and power consumption. However, this also
introduces new challenges that need to be addressed by
designers. This paper introduces a framework for
efficient mapping of execution units to task graphs in
a runtime reconfigurable system. The framework utilizes
an Island Based Genetic Algorithm flow that optimizes
several objectives including delay and power
consumption. The GA based technique not only optimizes
the above objectives, but also aggregates the Pareto
front of the different islands to further enhance
solution quality. The Island based GA runs each GA in
parallel, and is amenable to both software and hardware
implementation. The proposed Island based GA framework
achieves on average 55.2\% improvement over a single GA
implementation and 80.7\% improvement over a baseline
random allocation and binding approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Momeni:2015:EEO,
author = "Amir Momeni and Hamed Tabkhi and Yash Ukidave and
Gunar Schirner and David Kaeli",
title = "Exploring the Efficiency of the {OpenCL} Pipe Semantic
on an {FPGA}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "52--57",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927974",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper evaluates the potential benefits of
leveraging the OpenCL Pipe semantic to accelerate
FPGA-based applications. Our work focuses on streaming
applications in the embedded vision processing domain.
These applications are well-suited for concurrent
kernel execution support and inter-kernel communication
enabled by using OpenCL pipes. We analyze the impact of
multiple design factors and application optimizations
to improve the performance offered by OpenCL Pipes. The
design tradeoffs considered include: the execution
granularity across kernels, the rate and volume of data
transfers, and the Pipe size. For our case study
application of vision ow, we observe a 2.8X increase in
throughput for tuned pipelined kernels, as compared to
non-pipelined execution. In addition, we propose a
novel mechanism to efficiently capture the behavior for
2-dimensional (2D) vision algorithms to benefit
Pipe-based execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Mitsuishi:2015:BFS,
author = "Takuji Mitsuishi and Jun Suzuki and Yuki Hayashi and
Masaki Kan and Hideharu Amano",
title = "Breadth First Search on Cost-efficient Multi-{GPU}
Systems",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "58--63",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927975",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A parallel Breadth First Search (BFS) algorithm is
proposed for cost-efficient multi-GPU systems without
enough memory amount or communication performance. By
using an improved data structure for the duplication
elimination of local nodes, both required memory amount
and processing time are reduced. By using Unified
Virtual Addressing, time for communication can be
hidden with the computation. The proposed algorithm is
implemented on two cost-efficient multi-GPU systems:
Express multi-GPU system which has a full of
flexibility but the communication latency between GPU
and host is limited, and a high-end gaming machine
whose memory is limited. Both systems achieve good
strong scaling with the proposed methods. On Express
multi-GPU system, the communication overhead was almost
completely hidden, and the aggregate communication
throughput reached 4.77 GB/sec (38.16 Gbps), almost
theoretical maximum.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Mefenza:2015:IBM,
author = "Michael Mefenza and Nicolas Edwards and Christophe
Bobda",
title = "Interface Based Memory Synthesis of Image Processing
Applications in {FPGA}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "64--69",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927976",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Image processing applications are computationally
intensive and data intensive and rely on memory
elements (buffer, window, line buffer, shift register,
and frame buffer) to store data flow dependencies
between computing components in FPGA. Due to the
limited availability of these resources, optimization
of memory allocation and the implementation of
efficient memory architectures are important issues. We
present an interface, the Component Interconnect and
Data Access (CIDA), and its implementation, based on
interface automata formalism. We used that interface
for modeling image processing applications and
generating common memory elements. Based on the
proposed model and information about the FPGA
architecture, we also present an optimization model to
achieve allocation memory requirements to embedded
memories (Block RAM and Distributed RAM). Allocation
results from realistic video systems on Xilinx Zynq
FPGAs verify the correctness of the model and show that
the proposed approach achieves appreciable reduction in
block RAM usage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Tong:2015:HTS,
author = "Da Tong and Viktor Prasanna",
title = "High Throughput Sketch Based Online Heavy Hitter
Detection on {FPGA}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "70--75",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927977",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the context of networking, a heavy hitter is an
entity in a data stream whose amount of activity (such
as bandwidth consumption or number of connections) is
higher than a given threshold. Detecting heavy hitters
is a critical task for network management and security
in the Internet and data centers. Data streams in
modern network usually contain millions of entities,
such as traffic flows or IP domains. It is challenging
to detect heavy hitters at a high throughput while
supporting such a large number of entities. I this
work, we propose a high throughput online heavy hitter
detector based on the Count-min sketch algorithm on
FPGA. We propose a high throughput hash computation
architecture, optimize the Count-min sketch for
hardware-based heavy hitter detection and use
forwarding to deal with data hazards. The post
place-and-route results of our architecture on a
state-of-the-art FPGA shows high throughput and
scalability. Our architecture achieves a throughput of
114 Gbps while supporting a typical 1 M concurrent
entities. It sustains 100+ Gbps throughput while
supporting various number of concurrent entities,
stream sizes and accuracy requirements. Our
implementation demonstrates improved performance
compared with other sketch acceleration techniques on
various platforms using similar sketch
configurations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Wang:2015:CAS,
author = "Xinying Wang and Phillip H. Jones and Joseph
Zambreno",
title = "A Configurable Architecture for Sparse {$ L U $}
Decomposition on Matrices with Arbitrary Patterns",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "76--81",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927978",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Sparse LU decomposition has been widely used to solve
sparse linear systems of equations found in many
scientific and engineering applications, such as
circuit simulation, power system modeling and computer
vision. However, it is considered a computationally
expensive factorization tool. While parallel
implementations have been explored to accelerate sparse
LU decomposition, irregular sparsity patterns often
limit their performance gains. Prior FPGA-based
accelerators have been customized to domain-specific
sparsity patterns of pre-ordered symmetric matrices. In
this paper, we present an efficient architecture for
sparse LU decomposition that supports both symmetric
and asymmetric sparse matrices with arbitrary sparsity
patterns. The control structure of our architecture
parallelizes computation and pivoting operations. Also,
on-chip resource utilization is configured based on
properties of the matrices being processed. Our
experimental results show a 1:6 to 14x speedup over an
optimized software implementation for benchmarks
containing a wide range of sparsity patterns.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Sano:2015:SCS,
author = "Kentaro Sano and Fumiya Kono and Naohito Nakasato and
Alexander Vazhenin and Stanislav Sedukhin",
title = "Stream Computation of Shallow Water Equation Solver
for {FPGA}-based {$1$D} Tsunami Simulation",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "82--87",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927979",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "MOST (Method Of Splitting Tsunami) is widely used to
solve shallow water equations (SWEs) for forecasting
tsunami generated by an earthquake. Toward development
of a power-efficient and high-performance computing
system for 2D tsunami simulation, we conduct
feasibility study on stream computation of 1D SWE
solver with FPGA.We analyze an original code and design
a stream algorithm with techniques of kernel fusion,
shift buffering for streamed stencil-data access, and
cascading processing elements for a longer pipeline. We
implement a deep pipeline with at most 744 stages of 4
SPEs on 28 nm Stratix V FPGA, which achieves 82.4
GFlop/s at 200 MHz.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Guo:2015:PGA,
author = "Liucheng Guo and Andreea Ingrid Funie and David B.
Thomas and Haohuan Fu and Wayne Luk",
title = "Parallel Genetic Algorithms on Multiple {FPGAs}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "86--93",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927980",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Genetic algorithms (GA) have been shown to be
effective in the optimization of many large-scale
real-world problems in a reasonable amount of time.
Parallel GAs not only reduce the overall GA execution
time, but also bring higher quality solutions due to
parallel search in multiple parts of the solution
space. This paper proposes a parallel GA system on
hardware such as Field-Programmable-Gate-Arrays
(FPGAs). Our approach targets multiple FPGAs by
exploring different search areas of the same solution
space with different behaviours. Each FPGA contains an
optimised customisable GA which can be configured using
run-time parameters, removing the need for expensive
recompilation. This paper also explores adjustment of
the migration gap, providing empirical guidance on good
settings to users. Experiments on three problems show
the high performance of our system, with a 30 times
speedup achieved compared to a multi-core CPU-based
implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Thorson:2015:INb,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "94--100",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927982",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '15 conference proceedings.",
}
@Article{Thorson:2015:INc,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "5",
pages = "7--11",
month = dec,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2964792.2964794",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 12 16:17:49 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Asgharimoghaddam:2016:SPE,
author = "Hadi Asgharimoghaddam and Nam Sung Kim",
title = "{SpinWise}: a Practical Energy-Efficient
Synchronization Technique for {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "1",
pages = "1--8",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2971331.2971333",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 12 16:17:49 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Spinning had been the classical way of implementing
synchronization primitives (i.e., barriers, locks and
conditions) in pthread library before the adoption of
fast user space mutex (futex). Since spinning cores do
not perform any useful work, it has been believed that
futex is more energy efficient than spinning. In this
paper, using commercial chip multi-processors (CMPs),
first we provide deep insights on how the commercial
CMP and operating system together reduce power
consumption during spinning- and futex-based
synchronization and analyze the duration of
synchronization cycles for each implementation. Second,
we analyze limitations of existing techniques that
attempt to reduce power consumption of CMPs during
synchronization. Finally, we propose a spinning-based
energy-efficient synchronization technique dubbed
SpinWise. We demonstrate that SpinWise can provide 22\%
higher geometric mean energy efficiency than futex for
a CMP running applications with many frequent and short
synchronization events.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Olson:2016:PDW,
author = "Lena E. Olson and Mark D. Hill",
title = "Probabilistic Directed Writebacks for Exclusive
Caches",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "1",
pages = "9--18",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2971331.2971334",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 12 16:17:49 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Energy is an increasingly important consideration in
memory system design. Caches improve energy efficiency
by decreasing execution time and reducing the number of
main memory accesses, but they suffer from known
inefficiencies: the last-level cache (LLC) tends to
have a high miss ratio while simultaneously storing
many blocks that are never referenced. Because these
blocks are not referenced before eviction, we can write
them directly to memory rather than to the LLC. To do
so, we must predict which blocks will not be
referenced. Previous approaches rely on additional
state at the LLC and/or extra communication. We show
that by predicting working set size per program counter
(PC), we can decide which blocks have low probability
of being referenced. Our approach relies on the insight
that it is possible to makes this prediction based
solely on the address stream as seen by the level-one
data cache (L1D), eliminating the need to store or
communicate PC values between levels of the cache
hierarchy. We require no modifications to the LLC. Our
approach uses Flajolet and Martin's probabilistic
counting to keep the state small: two additional bits
per L1D block, with an additional 6KB prediction table.
This approach yields a large reduction in number of LLC
writebacks: 25\% fewer for SPEC on average, 80\% fewer
for graph500, and 67\% fewer for an in-memory hash
table.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Thorson:2016:INa,
author = "Mark Thorson",
title = "{Internet} Nuggets",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "1",
pages = "19--22",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2971331.2971336",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 12 16:17:49 MDT 2016",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zhou:2016:PUH,
author = "Yuanyuan Zhou",
title = "Programming Uncertain {$<$T$>$ hings}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "1--2",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872416",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Innovation flourishes with good abstractions. For
instance, codification of the IEEE Floating Point
standard in 1985 was critical to the subsequent success
of scientific computing. Programming languages
currently lack appropriate abstractions for uncertain
data. Applications already use estimates from sensors,
machine learning, big data, humans, and approximate
algorithms, but most programming languages do not help
developers address correctness, programmability, and
optimization problems due to estimates. To address
these problems, we propose a new programming
abstraction called Uncertain. We encourage the
community to develop and use abstractions for
estimates.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Abadal:2016:WAF,
author = "Sergi Abadal and Albert Cabellos-Aparicio and Eduard
Alarcon and Josep Torrellas",
title = "{WiSync}: an Architecture for Fast Synchronization
through On-Chip Wireless Communication",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "3--17",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872396",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In shared-memory multiprocessing, fine-grain
synchronization is challenging because it requires
frequent communication. As technology scaling delivers
larger manycore chips, such pattern is expected to
remain costly to support. In this paper, we propose to
address this challenge by using on-chip wireless
communication. Each core has a transceiver and an
antenna to communicate with all the other cores. This
environment supports very low latency global
communication. Our architecture, called WiSync, uses a
per-core Broadcast Memory (BM). When a core writes to
its BM, all the other 100+ BMs get updated in less than
10 processor cycles. We also use a second wireless
channel with cheaper transfers to execute barriers
efficiently. WiSync supports multiprogramming, virtual
memory, and context switching. Our evaluation with
simulations of 128-threaded kernels and 64-threaded
applications shows that WiSync speeds-up
synchronization substantially. Compared to using
advanced conventional synchronization, WiSync attains
an average speedup of nearly one order of magnitude for
the kernels, and 1.12 for PARSEC and SPLASH-2.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Wang:2016:RTE,
author = "Xiaodong Wang and Jos{\'e} F. Mart{\'\i}nez",
title = "{ReBudget}: Trading Off Efficiency vs. Fairness in
Market-Based Multicore Resource Allocation via Runtime
Budget Reassignment",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "19--32",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872382",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Efficiently allocating shared resources in computer
systems is critical to optimizing execution. Recently,
a number of market-based solutions have been proposed
to attack this problem. Some of them provide provable
theoretical bounds to efficiency and/or fairness losses
under market equilibrium. However, they are limited to
markets with potentially important constraints, such as
enforcing equal budget for all players, or
curve-fitting players' utility into a specific function
type. Moreover, they do not generally provide an
intuitive ``knob'' to control efficiency vs. fairness.
In this paper, we introduce two new metrics, Market
Utility Range (MUR) and Market Budget Range (MBR),
through which we provide for the first time theoretical
bounds on efficiency and fairness of market equilibria
under arbitrary budget assignments. We leverage this
result and propose ReBudget, an iterative budget
re-assignment algorithm that can be used to control
efficiency vs. fairness at run-time. We apply our
algorithm to a multi-resource allocation problem in
multicore chips. Our evaluation using detailed
execution-driven simulations shows that our budget
re-assignment technique is intuitive, effective, and
efficient.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Zhu:2016:DEQ,
author = "Haishan Zhu and Mattan Erez",
title = "{Dirigent}: Enforcing {QoS} for Latency-Critical Tasks
on Shared Multicore Systems",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "33--47",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872394",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Latency-critical applications suffer from both average
performance degradation and reduced completion time
predictability when collocated with batch tasks. Such
variation forces the system to overprovision resources
to ensure Quality of Service (QoS) for latency-critical
tasks, degrading overall system throughput. We explore
the causes of this variation and exploit the
opportunities of mitigating variation directly to
simultaneously improve both QoS and utilization. We
develop, implement, and evaluate Dirigent, a
lightweight performance-management runtime system that
accurately controls the QoS of latency-critical
applications at fine time scales, leveraging existing
architecture mechanisms. We evaluate Dirigent on a real
machine and show that it is significantly more
effective than configurations representative of prior
schemes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Kuperman:2016:PR,
author = "Yossi Kuperman and Eyal Moscovici and Joel Nider and
Razya Ladelsky and Abel Gordon and Dan Tsafrir",
title = "Paravirtual Remote {I/O}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "49--65",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872378",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The traditional ``trap and emulate'' I/O
paravirtualization model conveniently allows for I/O
interposition, yet it inherently incurs costly
guest-host context switches. The newer ``sidecore''
model eliminates this overhead by dedicating host
(side)cores to poll the relevant guest memory regions
and react accordingly without context switching. But
the dedication of sidecores on each host might be
wasteful when I/O activity is low, or it might not
provide enough computational power when I/O activity is
high. We propose to alleviate this problem at rack
scale by consolidating the dedicated sidecores spread
across several hosts onto one server. The hypervisor is
then effectively split into two parts: the local
hypervisor that hosts the VMs, and the remote
hypervisor that processes their paravirtual I/O. We
call this model vRIO---paraVirtual Remote I/O. We find
that by increasing the latency somewhat, it provides
comparable throughput with fewer sidecores and superior
throughput with the same number of sidecores as
compared to the state of the art. vRIO additionally
constitutes a new, cost-effective way to consolidate
I/O devices (on the remote hypervisor) while supporting
efficient programmable I/O interposition.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Kaufmann:2016:HPP,
author = "Antoine Kaufmann and SImon Peter and Naveen Kr. Sharma
and Thomas Anderson and Arvind Krishnamurthy",
title = "High Performance Packet Processing with {FlexNIC}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "67--81",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872367",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The recent surge of network I/O performance has put
enormous pressure on memory and software I/O processing
sub systems. We argue that the primary reason for high
memory and processing overheads is the inefficient use
of these resources by current commodity network
interface cards (NICs). We propose FlexNIC, a flexible
network DMA interface that can be used by operating
systems and applications alike to reduce packet
processing overheads. FlexNIC allows services to
install packet processing rules into the NIC, which
then executes simple operations on packets while
exchanging them with host memory. Thus, our proposal
moves some of the packet processing traditionally done
in software to the NIC, where it can be done flexibly
and at high speed. We quantify the potential benefits
of FlexNIC by emulating the proposed FlexNIC
functionality with existing hardware or in software. We
show that significant gains in application performance
are possible, in terms of both latency and throughput,
for several widely used applications, including a
key-value store, a stream processing system, and an
intrusion detection system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Bornholt:2016:SCF,
author = "James Bornholt and Antoine Kaufmann and Jialin Li and
Arvind Krishnamurthy and Emina Torlak and Xi Wang",
title = "Specifying and Checking File System Crash-Consistency
Models",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "83--98",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872406",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Applications depend on persistent storage to recover
state after system crashes. But the POSIX file system
interfaces do not define the possible outcomes of a
crash. As a result, it is difficult for application
writers to correctly understand the ordering of and
dependencies between file system operations, which can
lead to corrupt application state and, in the worst
case, catastrophic data loss. This paper presents
crash-consistency models, analogous to memory
consistency models, which describe the behavior of a
file system across crashes. Crash-consistency models
include both litmus tests, which demonstrate allowed
and forbidden behaviors, and axiomatic and operational
specifications. We present a formal framework for
developing crash-consistency models, and a toolkit,
called Ferrite, for validating those models against
real file system implementations. We develop a
crash-consistency model for ext4, and use Ferrite to
demonstrate unintuitive crash behaviors of the ext4
implementation. To demonstrate the utility of
crash-consistency models to application writers, we use
our models to prototype proof-of-concept verification
and synthesis tools, as well as new library interfaces
for crash-safe applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Prasad:2016:PMR,
author = "Aravinda Prasad and K. Gopinath",
title = "Prudent Memory Reclamation in Procrastination-Based
Synchronization",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "99--112",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872405",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Procrastination is the fundamental technique used in
synchronization mechanisms such as Read-Copy-Update
(RCU) where writers, in order to synchronize with
readers, defer the freeing of an object until there are
no readers referring to the object. The synchronization
mechanism determines when the deferred object is safe
to reclaim and when it is actually reclaimed. Hence,
such memory reclamations are completely oblivious of
the memory allocator state. This induces poor memory
allocator performance, for instance, when the
reclamations are ill-timed. Furthermore, deferred
objects provide hints about the future that inform
memory regions that are about to be freed. Although
useful, hints are not exploited as deferred objects are
not visible to memory allocators. We introduce
Prudence, a dynamic memory allocator, that is tightly
integrated with the synchronization mechanism to ensure
visibility of deferred objects to the memory allocator.
Such an integration enables Prudence to (i) identify
the safe time to reclaim deferred objects' memory, (ii)
have an inclusive view of the allocated, free and
about-to-be-freed objects, and (iii) exploit
optimizations based on the hints about the future
during important state transitions. Our evaluation in
the Linux kernel shows that Prudence integrated with
RCU performs 3.9X to 28X better in micro-benchmarks
compared to SLUB, a recent memory allocator in the
Linux kernel. It also improves the overall performance
perceptibly (4\%-18\%) for a mix of widely used
synthetic and application benchmarks. Further, it
performs better (up to 98\%) in terms of object hits in
caches, object cache churns, slab churns, peak memory
usage and total fragmentation, when compared with the
SLUB allocator.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Mukkara:2016:WID,
author = "Anurag Mukkara and Nathan Beckmann and Daniel
Sanchez",
title = "{Whirlpool}: Improving Dynamic Cache Management with
Static Data Classification",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "113--127",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872363",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cache hierarchies are increasingly non-uniform and
difficult to manage. Several techniques, such as
scratchpads or reuse hints, use static information
about how programs access data to manage the memory
hierarchy. Static techniques are effective on regular
programs, but because they set fixed policies, they are
vulnerable to changes in program behavior or available
cache space. Instead, most systems rely on dynamic
caching policies that adapt to observed program
behavior. Unfortunately, dynamic policies spend
significant resources trying to learn how programs use
memory, and yet they often perform worse than a static
policy. We present Whirlpool, a novel approach that
combines static information with dynamic policies to
reap the benefits of each. Whirlpool statically
classifies data into pools based on how the program
uses memory. Whirlpool then uses dynamic policies to
tune the cache to each pool. Hence, rather than setting
policies statically, Whirlpool uses static analysis to
guide dynamic policies. We present both an API that
lets programmers specify pools manually and a profiling
tool that discovers pools automatically in unmodified
binaries. We evaluate Whirlpool on a state-of-the-art
NUCA cache. Whirlpool significantly outperforms prior
approaches: on sequential programs, Whirlpool improves
performance by up to 38\% and reduces data movement
energy by up to 53\%; on parallel programs, Whirlpool
improves performance by up to 67\% and reduces data
movement energy by up to 2.6x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Jeon:2016:TTD,
author = "Myeongjae Jeon and Yuxiong He and Hwanju Kim and Sameh
Elnikety and Scott Rixner and Alan L. Cox",
title = "{TPC}: Target-Driven Parallelism Combining Prediction
and Correction to Reduce Tail Latency in Interactive
Services",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "129--141",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872370",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In interactive services such as web search,
recommendations, games and finance, reducing the tail
latency is crucial to provide fast response to every
user. Using web search as a driving example, we
systematically characterize interactive workload to
identify the opportunities and challenges for reducing
tail latency. We find that the workload consists of
mainly short requests that do not benefit from
parallelism, and a few long requests which
significantly impact the tail but exhibit high
parallelism speedup. This motivates estimating request
execution time, using a predictor, to identify long
requests and to parallelize them. Prediction, however,
is not perfect; a long request mispredicted as short is
likely to contribute to the server tail latency,
setting a ceiling on the achievable tail latency. We
propose TPC, an approach that combines prediction
information judiciously with dynamic correction for
inaccurate prediction. Dynamic correction increases
parallelism to accelerate a long request that is
mispredicted as short. TPC carefully selects the
appropriate target latencies based on system load and
parallelism efficiency to reduce tail latency. We
implement TPC and several prior approaches to compare
them experimentally on a single search server and on a
cluster of 40 search servers. The experimental results
show that TPC reduces the 99th- and 99.9th-percentile
latency by up to 40\% compared with the best prior
work. Moreover, we evaluate TPC on a finance server,
demonstrating its effectiveness on reducing tail
latency of interactive services beyond web search.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Brown:2016:HBS,
author = "Fraser Brown and Andres N{\"o}tzli and Dawson Engler",
title = "How to Build Static Checking Systems Using Orders of
Magnitude Less Code",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "143--157",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872364",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern static bug finding tools are complex. They
typically consist of hundreds of thousands of lines of
code, and most of them are wedded to one language (or
even one compiler). This complexity makes the systems
hard to understand, hard to debug, and hard to retarget
to new languages, thereby dramatically limiting their
scope. This paper reduces checking system complexity by
addressing a fundamental assumption, the assumption
that checkers must depend on a full-blown language
specification and compiler front end. Instead, our
program checkers are based on drastically incomplete
language grammars (``micro-grammars'') that describe
only portions of a language relevant to a checker. As a
result, our implementation is tiny-roughly 2500 lines
of code, about two orders of magnitude smaller than a
typical system. We hope that this dramatic increase in
simplicity will allow people to use more checkers on
more systems in more languages. We implement our
approach in $ \mu $ chex, a language-agnostic framework
for writing static bug checkers. We use it to build
micro-grammar based checkers for six languages (C, the
C preprocessor, C++, Java, JavaScript, and Dart) and
find over 700 errors in real-world projects.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Zhang:2016:TED,
author = "Tong Zhang and Dongyoon Lee and Changhee Jung",
title = "{TxRace}: Efficient Data Race Detection Using
Commodity Hardware Transactional Memory",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "159--173",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872384",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Detecting data races is important for debugging
shared-memory multithreaded programs, but the high
runtime overhead prevents the wide use of dynamic data
race detectors. This paper presents TxRace, a new
software data race detector that leverages commodity
hardware transactional memory (HTM) to speed up data
race detection. TxRace instruments a multithreaded
program to transform synchronization-free regions into
transactions, and exploits the conflict detection
mechanism of HTM for lightweight data race detection at
runtime. However, the limitations of the current
best-effort commodity HTMs expose several challenges in
using them for data race detection: (1) lack of ability
to pinpoint racy instructions, (2) false positives
caused by cache line granularity of conflict detection,
and (3) transactional aborts for non-conflict reasons
(e.g., capacity or unknown). To overcome these
challenges, TxRace performs lightweight HTM-based data
race detection at first, and occasionally switches to
slow yet precise data race detection only for the small
fraction of execution intervals in which potential
races are reported by HTM. According to the
experimental results, TxRace reduces the average
runtime overhead of dynamic data race detection from
11.68x to 4.65x with only a small number of false
negatives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Amani:2016:CVH,
author = "Sidney Amani and Alex Hixon and Zilin Chen and
Christine Rizkallah and Peter Chubb and Liam O'Connor
and Joel Beeren and Yutaka Nagashima and Japheth Lim
and Thomas Sewell and Joseph Tuong and Gabriele Keller
and Toby Murray and Gerwin Klein and Gernot Heiser",
title = "{Cogent}: Verifying High-Assurance File System
Implementations",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "175--188",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872404",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present an approach to writing and formally
verifying high-assurance file-system code in a
restricted language called Cogent, supported by a
certifying compiler that produces C code, high-level
specification of Cogent, and translation correctness
proofs. The language is strongly typed and guarantees
absence of a number of common file system
implementation errors. We show how verification effort
is drastically reduced for proving higher-level
properties of the file system implementation by
reasoning about the generated formal specification
rather than its low-level C code. We use the framework
to write two Linux file systems, and compare their
performance with their native C implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Asmussen:2016:MHO,
author = "Nils Asmussen and Marcus V{\"o}lp and Benedikt
N{\"o}then and Hermann H{\"a}rtig and Gerhard
Fettweis",
title = "{M3}: a Hardware\slash Operating-System Co-Design to
Tame Heterogeneous Manycores",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "189--203",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872371",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the last decade, the number of available cores
increased and heterogeneity grew. In this work, we ask
the question whether the design of the current
operating systems (OSes) is still appropriate if these
trends continue and lead to abundantly available but
heterogeneous cores, or whether it forces a fundamental
rethinking of how systems are designed. We argue that:
1. hiding heterogeneity behind a common hardware
interface unifies, to a large extent, the control and
coordination of cores and accelerators in the OS, 2.
isolating at the network-on-chip rather than with
processor features (like privileged mode, memory
management unit, ...), allows running untrusted code on
arbitrary cores, and 3. providing OS services via
protocols over the network-on-chip, instead of via
system calls, makes them accessible to arbitrary types
of cores as well. In summary, this turns accelerators
into first-class citizens and enables a single and
convenient programming environment for all cores
without the need to trust any application. In this
paper, we introduce network-on-chip-level isolation,
present the design of our microkernel-based OS, M3, and
the common hardware interface, and evaluate the
performance of our prototype in comparison to Linux. A
bit surprising, without using accelerators, M3
outperforms Linux in some application-level benchmarks
by more than a factor of five.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Liaqat:2016:SEE,
author = "Daniyal Liaqat and Silviu Jingoi and Eyal de Lara and
Ashvin Goel and Wilson To and Kevin Lee and Italo {De
Moraes Garcia} and Manuel Saldana",
title = "Sidewinder: an Energy Efficient and Developer Friendly
Heterogeneous Architecture for Continuous Mobile
Sensing",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "205--215",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872398",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Applications that perform continuous sensing on mobile
phones have the potential to revolutionize everyday
life. Examples range from medical and health monitoring
applications, such as pedometers and fall detectors, to
participatory sensing applications, such as noise
pollution, traffic and seismic activity monitoring.
Unfortunately, current mobile devices are a poor match
for continuous sensing applications as they require the
device to remain awake for extended periods of time,
resulting in poor battery life. This paper presents
Sidewinder, a new approach towards offloading sensor
data processing to a low-power processor and waking up
the main processor when events of interest occur. This
approach differs from other heterogeneous architectures
in that developers are presented with a programming
interface that lets them construct application specific
wake-up conditions by linking together and
parameterizing predefined sensor data processing
algorithms. Our experiments indicate performance that
is comparable to approaches that provide fully
programmable offloading, but do so with a much simpler
programming interface that facilitates deployment and
portability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Balkind:2016:OOS,
author = "Jonathan Balkind and Michael McKeown and Yaosheng Fu
and Tri Nguyen and Yanqi Zhou and Alexey Lavrov and
Mohammad Shahrad and Adi Fuchs and Samuel Payne and
Xiaohua Liang and Matthew Matl and David Wentzlaff",
title = "{OpenPiton}: an Open Source Manycore Research
Framework",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "217--232",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872414",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Industry is building larger, more complex, manycore
processors on the back of strong institutional
knowledge, but academic projects face difficulties in
replicating that scale. To alleviate these difficulties
and to develop and share knowledge, the community needs
open architecture frameworks for simulation, synthesis,
and software exploration which support extensibility,
scalability, and configurability, alongside an
established base of verification tools and supported
software. In this paper we present OpenPiton, an open
source framework for building scalable architecture
research prototypes from 1 core to 500 million cores.
OpenPiton is the world's first open source,
general-purpose, multithreaded manycore processor and
framework. OpenPiton leverages the industry hardened
OpenSPARC T1 core with modifications and builds upon it
with a scratch-built, scalable uncore creating a
flexible, modern manycore design. In addition,
OpenPiton provides synthesis and backend scripts for
ASIC and FPGA to enable other researchers to bring
their designs to implementation. OpenPiton provides a
complete verification infrastructure of over 8000
tests, is supported by mature software tools, runs
full-stack multiuser Debian Linux, and is written in
industry standard Verilog. Multiple implementations of
OpenPiton have been created including a taped-out
25-core implementation in IBM's 32nm process and
multiple Xilinx FPGA prototypes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Lustig:2016:CVM,
author = "Daniel Lustig and Geet Sethi and Margaret Martonosi
and Abhishek Bhattacharjee",
title = "{COATCheck}: Verifying Memory Ordering at the
Hardware-OS Interface",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "233--247",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872399",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern computer systems include numerous compute
elements, from CPUs to GPUs to accelerators. Harnessing
their full potential requires well-defined,
properly-implemented memory consistency models (MCMs),
and low-level system functionality such as virtual
memory and address translation (AT). Unfortunately, it
is difficult to specify and implement hardware-OS
interactions correctly; in the past, many hardware and
OS specification mismatches have resulted in
implementation bugs in commercial processors. In an
effort to resolve this verification gap, this paper
makes the following contributions. First, we present
COATCheck, an address translation-aware framework for
specifying and statically verifying memory ordering
enforcement at the microarchitecture and operating
system levels. We develop a domain-specific language
for specifying ordering enforcement, for including
ordering-related OS events and hardware
micro-operations, and for programmatically enumerating
happens-before graphs. Using a fast and automated
static constraint solver, COATCheck can efficiently
analyze interesting and important memory ordering
scenarios for modern, high-performance, out-of-order
processors. Second, we show that previous work on
Virtual Address Memory Consistency (VAMC) does not
capture every translation-related ordering scenario of
interest, and that some such cases even fall outside
the traditional scope of consistency. We therefore
introduce the term transistency model to describe the
superset of consistency which captures all
translation-aware sets of ordering rules.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Markuze:2016:TIP,
author = "Alex Markuze and Adam Morrison and Dan Tsafrir",
title = "True {IOMMU} Protection from {DMA} Attacks: When Copy
is Faster than Zero Copy",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "249--262",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872379",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Malicious I/O devices might compromise the OS using
DMAs. The OS therefore utilizes the IOMMU to map and
unmap every target buffer right before and after its
DMA is processed, thereby restricting DMAs to their
designated locations. This usage model, however, is not
truly secure for two reasons: (1) it provides
protection at page granularity only, whereas DMA
buffers can reside on the same page as other data; and
(2) it delays DMA buffer unmaps due to performance
considerations, creating a vulnerability window in
which devices can access in-use memory. We propose that
OSes utilize the IOMMU differently, in a manner that
eliminates these two flaws. Our new usage model
restricts device access to a set of shadow DMA buffers
that are never unmapped, and it copies DMAed data
to/from these buffers, thus providing sub-page
protection while eliminating the aforementioned
vulnerability window. Our key insight is that the cost
of interacting with, and synchronizing access to the
slow IOMMU hardware---required for zero-copy protection
against devices---make copying preferable to
zero-copying. We implement our model in Linux and
evaluate it with standard networking benchmarks
utilizing a 40,Gb/s NIC. We demonstrate that despite
being more secure than the safest preexisting usage
model, our approach provides up to 5x higher
throughput. Additionally, whereas it is inherently less
scalable than an IOMMU-less (unprotected) system, our
approach incurs only 0\%--25\% performance degradation
in comparison.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Awad:2016:SSZ,
author = "Amro Awad and Pratyusa Manadhata and Stuart Haber and
Yan Solihin and William Horne",
title = "{Silent Shredder}: Zero-Cost Shredding for Secure
Non-Volatile Main Memory Controllers",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "263--276",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872377",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As non-volatile memory (NVM) technologies are expected
to replace DRAM in the near future, new challenges have
emerged. For example, NVMs have slow and
power-consuming writes, and limited write endurance. In
addition, NVMs have a data remanence vulnerability,
i.e., they retain data for a long time after being
powered off. NVM encryption alleviates the
vulnerability, but exacerbates the limited endurance by
increasing the number of writes to memory. We observe
that, in current systems, a large percentage of main
memory writes result from data shredding in operating
systems, a process of zeroing out physical pages before
mapping them to new processes, in order to protect
previous processes' data. In this paper, we propose
Silent Shredder, which repurposes initialization
vectors used in standard counter mode encryption to
completely eliminate the data shredding writes. Silent
Shredder also speeds up reading shredded cache lines,
and hence reduces power consumption and improves
overall performance. To evaluate our design, we run
three PowerGraph applications and 26 multi-programmed
workloads from the SPEC 2006 suite, on a gem5-based
full system simulator. Silent Shredder eliminates an
average of 48.6\% of the writes in the initialization
and graph construction phases. It speeds up main memory
reads by 3.3 times, and improves the number of
instructions per cycle (IPC) by 6.4\% on average.
Finally, we discuss several use cases, including
virtual machines' data isolation and user-level large
data initialization, where Silent Shredder can be used
effectively at no extra cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Kwon:2016:SPT,
author = "Youngjin Kwon and Alan M. Dunn and Michael Z. Lee and
Owen S. Hofmann and Yuanzhong Xu and Emmett Witchel",
title = "{Sego}: Pervasive Trusted Metadata for Efficiently
Verified Untrusted System Services",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "277--290",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872372",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Sego is a hypervisor-based system that gives strong
privacy and integrity guarantees to trusted
applications, even when the guest operating system is
compromised or hostile. Sego verifies operating system
services, like the file system, instead of replacing
them. By associating trusted metadata with user data
across all system devices, Sego verifies system
services more efficiently than previous systems,
especially services that depend on data contents. We
extensively evaluate Sego's performance on real
workloads and implement a kernel fault injector to
validate Sego's file system-agnostic crash consistency
and recovery protocol.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Tsafrir:2016:SAW,
author = "Dan Tsafrir",
title = "Synopsis of the {ASPLOS '16 Wild and Crazy Ideas
(WACI)} Invited-Speakers Session",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "291--294",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2876512",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The Wild and Crazy Ideas (WACI) session is a
longstanding tradition at ASPLOS, soliciting talks that
consist of forward-looking, visionary, inspiring,
creative, far out or just plain amazing ideas presented
in an exciting way. (Amusing elements in the
presentations are tolerated ;-) but are in fact
optional.) The first WACI session took place in 1998.
Back then, the call for talks included a problem
statement, which contended that ``papers usually do not
get admitted to [such conferences as] ISCA or ASPLOS
unless the systems that they describe are mature enough
to run [some standard benchmark suites, which] has a
chilling effect on the idea generation process ---
encouraging incremental research'' [1]. The 1998 WACI
session turned out to be a great success. Its webpage
states that ``there were 42 submissions [competing
over] only eight time slots, [which resulted in] this
session [having] a lower acceptance rate than the
conference itself'' [2]. But the times they are
a-changin' [3], and the WACI session no longer enjoys
that many submissions (Figure (1)), perhaps because
nowadays there exist many forums for researchers to
describe/discuss their preliminary ideas, including:
the ``hot topics in'' workshops [4--7]; a journal like
CAL, dedicated to early results [8]; main conferences
soliciting short submissions describing ``original or
unconventional ideas at a preliminary stage'' in
addition to regular papers [9]; and the many workshops
co-located with main conferences, like ISCA '15, which
hosted thirteen such workshops [10]. Regardless of the
reason for the declining number of submissions, this
time we've decided to organize the WACI session
differently to ensure its continued high quality.
Instead of soliciting talks via an open call and hoping
for the best, we proactively invited speakers whom we
believe are capable of delivering excellent WACI
presentations. That is, this year's WACI session
consists exclusively of invited speakers. Filling up
the available slots turned out to be fairly easy, as
most of the researchers we invited promptly accepted
our invitation. The duration of each talk was set to be
eight minutes (exactly as in the first WACI session
from 1998) plus two minutes for questions. The talks
are outlined below. We believe they are interesting and
exciting, and we hope the attendees of the session will
find them stimulating and insightful.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Williams:2016:BIC,
author = "R. Stanley Williams",
title = "Brain Inspired Computing",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "295--295",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872417",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Phothilimthana:2016:SS,
author = "Phitchaya Mangpo Phothilimthana and Aditya Thakur and
Rastislav Bodik and Dinakar Dhurjati",
title = "Scaling up Superoptimization",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "297--310",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872387",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Developing a code optimizer is challenging, especially
for new, idiosyncratic ISAs. Superoptimization can, in
principle, discover machine-specific optimizations
automatically by searching the space of all instruction
sequences. If we can increase the size of code
fragments a superoptimizer can optimize, we will be
able to discover more optimizations. We develop LENS, a
search algorithm that increases the size of code a
superoptimizer can synthesize by rapidly pruning away
invalid candidate programs. Pruning is achieved by
selectively refining the abstraction under which
candidates are considered equivalent, only in the
promising part of the candidate space. LENS also uses a
bidirectional search strategy to prune the candidate
space from both forward and backward directions. These
pruning strategies allow LENS to solve twice as many
benchmarks as existing enumerative search algorithms,
while LENS is about 11-times faster. Additionally, we
increase the effective size of the superoptimized
fragments by relaxing the correctness condition using
contexts (surrounding code). Finally, we combine LENS
with complementary search techniques into a cooperative
superoptimizer, which exploits the stochastic search to
make random jumps in a large candidate space, and a
symbolic (SAT-solver-based) search to synthesize
arbitrary constants. While existing superoptimizers
consistently solve 9--16 out of 32 benchmarks, the
cooperative superoptimizer solves 29 benchmarks. It can
synthesize code fragments that are up to 82\% faster
than code generated by gcc -O3 from WiBench and
MiBench.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Hasabnis:2016:LAI,
author = "Niranjan Hasabnis and R. Sekar",
title = "Lifting Assembly to Intermediate Representation: a
Novel Approach Leveraging Compilers",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "311--324",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872380",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Translating low-level machine instructions into
higher-level intermediate language (IL) is one of the
central steps in many binary analysis and
instrumentation systems. Existing systems build such
translators manually. As a result, it takes a great
deal of effort to support new architectures. Even for
widely deployed architectures, full instruction sets
may not be modeled, e.g., mature systems such as
Valgrind still lack support for AVX, FMA4 and SSE4.1
for x86 processors. To overcome these difficulties, we
propose a novel approach that leverages knowledge about
instruction set semantics that is already embedded into
modern compilers such as GCC. In particular, we present
a learning-based approach for automating the
translation of assembly instructions to a compiler's
architecture-neutral IL. We present an experimental
evaluation that demonstrates the ability of our
approach to easily support many architectures (x86, ARM
and AVR), including their advanced instruction sets.
Our implementation is available as open-source
software.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Muralidharan:2016:AAC,
author = "Saurav Muralidharan and Amit Roy and Mary Hall and
Michael Garland and Piyush Rai",
title = "Architecture-Adaptive Code Variant Tuning",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "325--338",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872411",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Code variants represent alternative implementations of
a computation, and are common in high-performance
libraries and applications to facilitate selecting the
most appropriate implementation for a specific
execution context (target architecture and input
dataset). Automating code variant selection typically
relies on machine learning to construct a model during
an offline learning phase that can be quickly queried
at runtime once the execution context is known. In this
paper, we define a new approach called
architecture-adaptive code variant tuning, where the
variant selection model is learned on a set of source
architectures, and then used to predict variants on a
new target architecture without having to repeat the
training process. We pose this as a multi-task learning
problem, where each source architecture corresponds to
a task; we use device features in the construction of
the variant selection model. This work explores the
effectiveness of multi-task learning and the impact of
different strategies for device feature selection. We
evaluate our approach on a set of benchmarks and a
collection of six NVIDIA GPU architectures from three
distinct generations. We achieve performance results
that are mostly comparable to the previous approach of
tuning for a single GPU architecture without having to
repeat the learning phase.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Lin:2016:SKT,
author = "Xiaofeng Lin and Yu Chen and Xiaodong Li and Junjie
Mao and Jiaquan He and Wei Xu and Yuanchun Shi",
title = "Scalable Kernel {TCP} Design and Implementation for
Short-Lived Connections",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "339--352",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872391",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the rapid growth of network bandwidth, increases
in CPU cores on a single machine, and application API
models demanding more short-lived connections, a
scalable TCP stack is performance-critical. Although
many clean-state designs have been proposed, production
environments still call for a bottom-up parallel TCP
stack design that is backward-compatible with existing
applications. We present Fastsocket, a BSD
Socket-compatible and scalable kernel socket design,
which achieves table-level connection partition in TCP
stack and guarantees connection locality for both
passive and active connections. Fastsocket architecture
is a ground up partition design, from NIC interrupts
all the way up to applications, which naturally
eliminates various lock contentions in the entire
stack. Moreover, Fastsocket maintains the full
functionality of the kernel TCP stack and
BSD-socket-compatible API, and thus applications need
no modifications. Our evaluations show that Fastsocket
achieves a speedup of 20.4x on a 24-core machine under
a workload of short-lived connections, outperforming
the state-of-the-art Linux kernel TCP implementations.
When scaling up to 24 CPU cores, Fastsocket increases
the throughput of Nginx and HAProxy by 267\% and 621\%
respectively compared with the base Linux kernel. We
also demonstrate that Fastsocket can achieve
scalability and preserve BSD socket API at the same
time. Fastsocket is already deployed in the production
environment of Sina WeiBo, serving 50 million daily
active users and billions of requests per day.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Hajj:2016:SPM,
author = "Izzat {El Hajj} and Alexander Merritt and Gerd
Zellweger and Dejan Milojicic and Reto Achermann and
Paolo Faraboschi and Wen-mei Hwu and Timothy Roscoe and
Karsten Schwan",
title = "{SpaceJMP}: Programming with Multiple Virtual Address
Spaces",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "353--368",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872366",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory-centric computing demands careful organization
of the virtual address space, but traditional methods
for doing so are inflexible and inefficient. If an
application wishes to address larger physical memory
than virtual address bits allow, if it wishes to
maintain pointer-based data structures beyond process
lifetimes, or if it wishes to share large amounts of
memory across simultaneously executing processes,
legacy interfaces for managing the address space are
cumbersome and often incur excessive overheads. We
propose a new operating system design that promotes
virtual address spaces to first-class citizens,
enabling process threads to attach to, detach from, and
switch between multiple virtual address spaces. Our
work enables data-centric applications to utilize vast
physical memory beyond the virtual range, represent
persistent pointer-rich data structures without special
pointer representations, and share large amounts of
memory between processes efficiently. We describe our
prototype implementations in the DragonFly BSD and
Barrelfish operating systems. We also present
programming semantics and a compiler transformation to
detect unsafe pointer usage. We demonstrate the
benefits of our work on data-intensive applications
such as the GUPS benchmark, the SAMTools genomics
workflow, and the Redis key-value store.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Lin:2016:MTP,
author = "Felix Xiaozhu Lin and Xu Liu",
title = "{{\ttf memif}}: Towards Programming Heterogeneous
Memory Asynchronously",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "369--383",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872401",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To harness a heterogeneous memory hierarchy, it is
advantageous to integrate application knowledge in
guiding frequent memory move, i.e., replicating or
migrating virtual memory regions. To this end, we
present memif, a protected OS service for asynchronous,
hardware-accelerated memory move. Compared to the state
of the art --- page migration in Linux, memif incurs
low overhead and low latency; in order to do so, it not
only redefines the semantics of kernel interface but
also overhauls the underlying mechanisms, including
request/completion management, race handling, and DMA
engine configuration. We implement memif in Linux for a
server-class system-on-chip that features heterogeneous
memories. Compared to the current Linux page migration,
memif reduces CPU usage by up to 15\% for small pages
and by up to 38x for large pages; in continuously
serving requests, memif has no need for request
batching and reduces latency by up to 63\%. By crafting
a small runtime atop memif, we improve the throughputs
for a set of streaming workloads by up to 33\%.
Overall, memif has opened the door to software
management of heterogeneous memory.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Kim:2016:NEN,
author = "Wook-Hee Kim and Jinwoong Kim and Woongki Baek and
Beomseok Nam and Youjip Won",
title = "{NVWAL}: Exploiting {NVRAM} in Write-Ahead Logging",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "385--398",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872392",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging byte-addressable non-volatile memory is
considered an alternative storage device for database
logs that require persistency and high performance. In
this work, we develop NVWAL (NVRAM Write-Ahead Logging)
for SQLite. The contribution of NVWAL consists of three
elements: (i) byte-granularity differential logging
that effectively eliminates the excessive I/O overhead
of filesystem-based logging or journaling, (ii)
transaction-aware lazy synchronization that reduces
cache synchronization overhead by two-thirds, and (iii)
user-level heap management of the NVRAM persistent WAL
structure, which reduces the overhead of managing
persistent objects. We implemented NVWAL in SQLite and
measured the performance on a Nexus 5 smartphone and an
NVRAM emulation board --- Tuna. Our performance study
shows the following: (i) the overhead of enforcing
strict ordering of NVRAM writes can be reduced via
NVRAM-aware transaction management. (ii) From the
application performance point of view, the overhead of
guaranteeing failure atomicity is negligible; the cache
line flush overhead accounts for only 0.8~4.6\% of
transaction execution time. Therefore, application
performance is much less sensitive to the NVRAM
performance than we expected. Decreasing the NVRAM
latency by one-fifth (from 1942 nsec to 437 nsec),
SQLite achieves a mere 4\% performance gain (from 2517
ins/sec to 2621 ins/sec). (iii) Overall, when the write
latency of NVRAM is 2 usec, NVWAL increases SQLite
performance by at least 10x compared to that of WAL on
flash memory (from 541 ins/sec to 5812 ins/sec).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Kolli:2016:HPT,
author = "Aasheesh Kolli and Steven Pelley and Ali Saidi and
Peter M. Chen and Thomas F. Wenisch",
title = "High-Performance Transactions for Persistent
Memories",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "399--411",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872381",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging non-volatile memory (NVRAM) technologies
offer the durability of disk with the
byte-addressability of DRAM. These devices will allow
software to access persistent data structures directly
in NVRAM using processor loads and stores, however,
ensuring consistency of persistent data across power
failures and crashes is difficult. Atomic, durable
transactions are a widely used abstraction to enforce
such consistency. Implementing transactions on NVRAM
requires the ability to constrain the order of NVRAM
writes, for example, to ensure that a transaction's log
record is complete before it is marked committed. Since
NVRAM write latencies are expected to be high,
minimizing these ordering constraints is critical for
achieving high performance. Recent work has proposed
programming interfaces to express NVRAM write ordering
constraints to hardware so that NVRAM writes may be
coalesced and reordered while preserving necessary
constraints. Unfortunately, a straightforward
implementation of transactions under these interfaces
imposes unnecessary constraints. We show how to remove
these dependencies through a variety of techniques,
notably, deferring commit until after locks are
released. We present a comprehensive analysis
contrasting two transaction designs across three NVRAM
programming interfaces, demonstrating up to 2.5x
speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Guo:2016:HDI,
author = "Qing Guo and Karin Strauss and Luis Ceze and Henrique
S. Malvar",
title = "High-Density Image Storage Using Approximate Memory
Cells",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "413--426",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872413",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes tailoring image encoding for an
approximate storage substrate. We demonstrate that
indiscriminately storing encoded images in approximate
memory generates unacceptable and uncontrollable
quality degradation. The key finding is that errors in
the encoded bit streams have non-uniform impact on the
decoded image quality. We develop a methodology to
determine the relative importance of encoded bits and
store them in an approximate storage substrate. The
storage cells are optimized to reduce error rate via
biasing and are tuned to meet the desired reliability
requirement via selective error correction. In a case
study with the progressive transform codec (PTC), a
precursor to JPEG XR, the proposed approximate image
storage system exhibits a 2.7x increase in density of
pixels per silicon volume under bounded error rates,
and this achievement is additive to the storage savings
of PTC compression.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Izraelevitz:2016:FAP,
author = "Joseph Izraelevitz and Terence Kelly and Aasheesh
Kolli",
title = "Failure-Atomic Persistent Memory Updates via {JUSTDO}
Logging",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "427--442",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872410",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Persistent memory invites applications to manipulate
persistent data via load and store instructions.
Because failures during updates may destroy transient
data (e.g., in CPU registers), preserving data
integrity in the presence of failures requires
failure-atomic bundles of updates. Prior failure
atomicity approaches for persistent memory entail
overheads due to logging and CPU cache flushing.
Persistent caches can eliminate the need for flushing,
but conventional logging remains complex and memory
intensive. We present the design and implementation of
JUSTDO logging, a new failure atomicity mechanism that
greatly reduces the memory footprint of logs,
simplifies log management, and enables fast parallel
recovery following failure. Crash-injection tests
confirm that JUSTDO logging preserves application data
integrity and performance evaluations show that it
improves throughput 3x or more compared with a
state-of-the-art alternative for a spectrum of
data-intensive algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Han:2016:IMD,
author = "Jaeung Han and Seungheun Jeon and Young-ri Choi and
Jaehyuk Huh",
title = "Interference Management for Distributed Parallel
Applications in Consolidated Clusters",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "443--456",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872388",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Consolidating multiple applications on a system can
improve the overall resource utilization of data center
systems. However, such consolidation can adversely
affect the performance of some applications due to
interference caused by resource contention. Despite
many prior studies on the interference effects in
single-node systems, the interference behaviors of
distributed parallel applications have not been
investigated thoroughly. With distributed applications,
a local interference in a node can affect the whole
execution of an application spanning many nodes. This
paper studies an interference modeling methodology for
distributed applications to predict their performance
under interference effects in consolidated clusters.
This study first characterizes the effects of
interference for various distributed applications over
different interference settings, and analyzes how
diverse interference intensities on multiple nodes
affect the overall performance. Based on the
characterization, this study proposes a static
profiling-based model for interference propagation and
heterogeneity behaviors. In addition, this paper
presents use case studies of the modeling method, two
interference-aware placement techniques for
consolidated virtual clusters, which attempt to
maximize the overall throughput or to guarantee the
quality-of-service.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Maas:2016:THL,
author = "Martin Maas and Krste Asanovi{\'c} and Tim Harris and
John Kubiatowicz",
title = "{Taurus}: a Holistic Language Runtime System for
Coordinating Distributed Managed-Language
Applications",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "457--471",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872386",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many distributed workloads in today's data centers are
written in managed languages such as Java or Ruby.
Examples include big data frameworks such as Hadoop,
data stores such as Cassandra or applications such as
the SOLR search engine. These workloads typically run
across many independent language runtime systems on
different nodes. This setup represents a source of
inefficiency, as these language runtime systems are
unaware of each other. For example, they may perform
Garbage Collection at times that are locally reasonable
but not in a distributed setting. We address these
problems by introducing the concept of a Holistic
Runtime System that makes runtime-level decisions for
the entire distributed application rather than locally.
We then present Taurus, a Holistic Runtime System
prototype. Taurus is a JVM drop-in replacement,
requires almost no configuration and can run unmodified
off-the-shelf Java applications. Taurus enforces
user-defined coordination policies and provides a DSL
for writing these policies. By applying Taurus to
Garbage Collection, we demonstrate the potential of
such a system and use it to explore coordination
strategies for the runtime systems of real-world
distributed applications, to improve application
performance and address tail-latencies in
latency-sensitive workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Delimitrou:2016:HRE,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{HCloud}: Resource-Efficient Provisioning in Shared
Cloud Systems",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "473--488",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872365",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cloud computing promises flexibility and high
performance for users and cost efficiency for
operators. To achieve this, cloud providers offer
instances of different sizes, both as long-term
reservations and short-term, on-demand allocations.
Unfortunately, determining the best provisioning
strategy is a complex, multi-dimensional problem that
depends on the load fluctuation and duration of
incoming jobs, and the performance unpredictability and
cost of resources. We first compare the two main
provisioning strategies (reserved and on-demand
resources) on Google Compute Engine (GCE) using three
representative workload scenarios with batch and
latency-critical applications. We show that either
approach is suboptimal for performance or cost. We then
present HCloud, a hybrid provisioning system that uses
both reserved and on-demand resources. HCloud
determines which jobs should be mapped to reserved
versus on-demand resources based on overall load, and
resource unpredictability. It also determines the
optimal instance size an application needs to satisfy
its Quality of Service (QoS) constraints. We
demonstrate that hybrid configurations improve
performance by 2.1x compared to fully on-demand
provisioning, and reduce cost by 46\% compared to fully
reserved systems. We also show that hybrid strategies
are robust to variation in system and job parameters,
such as cost and system load.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Yu:2016:CWM,
author = "Xiao Yu and Pallavi Joshi and Jianwu Xu and Guoliang
Jin and Hui Zhang and Guofei Jiang",
title = "{CloudSeer}: Workflow Monitoring of Cloud
Infrastructures via Interleaved Logs",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "489--502",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872407",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cloud infrastructures provide a rich set of management
tasks that operate computing, storage, and networking
resources in the cloud. Monitoring the executions of
these tasks is crucial for cloud providers to promptly
find and understand problems that compromise cloud
availability. However, such monitoring is challenging
because there are multiple distributed service
components involved in the executions. CloudSeer
enables effective workflow monitoring. It takes a
lightweight non-intrusive approach that purely works on
interleaved logs widely existing in cloud
infrastructures. CloudSeer first builds an automaton
for the workflow of each management task based on
normal executions, and then it checks log messages
against a set of automata for workflow divergences in a
streaming manner. Divergences found during the checking
process indicate potential execution problems, which
may or may not be accompanied by error log messages.
For each potential problem, CloudSeer outputs necessary
context information including the affected task
automaton and related log messages hinting where the
problem occurs to help further diagnosis. Our
experiments on OpenStack, a popular open-source cloud
infrastructure, show that CloudSeer's efficiency and
problem-detection capability are suitable for online
monitoring.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Kwon:2016:LCI,
author = "Yonghwi Kwon and Dohyeong Kim and William Nick Sumner
and Kyungtae Kim and Brendan Saltaformaggio and Xiangyu
Zhang and Dongyan Xu",
title = "{LDX}: Causality Inference by Lightweight Dual
Execution",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "503--515",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872395",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Causality inference, such as dynamic taint analysis,
has many applications (e.g., information leak
detection). It determines whether an event e is
causally dependent on a preceding event c during
execution. We develop a new causality inference engine
LDX. Given an execution, it spawns a slave execution,
in which it mutates c and observes whether any change
is induced at e. To preclude non-determinism, LDX
couples the executions by sharing syscall outcomes. To
handle path differences induced by the perturbation, we
develop a novel on-the-fly execution alignment scheme
that maintains a counter to reflect the progress of
execution. The scheme relies on program analysis and
compiler transformation. LDX can effectively detect
information leak and security attacks with an average
overhead of 6.08\% while running the master and the
slave concurrently on separate CPUs, much lower than
existing systems that require instruction level
monitoring. Furthermore, it has much better accuracy in
causality inference.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Leesatapornwongsa:2016:TTN,
author = "Tanakorn Leesatapornwongsa and Jeffrey F. Lukman and
Shan Lu and Haryadi S. Gunawi",
title = "{TaxDC}: a Taxonomy of Non-Deterministic Concurrency
Bugs in Datacenter Distributed Systems",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "517--530",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872374",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present TaxDC, the largest and most comprehensive
taxonomy of non-deterministic concurrency bugs in
distributed systems. We study 104 distributed
concurrency (DC) bugs from four widely-deployed
cloud-scale datacenter distributed systems, Cassandra,
Hadoop MapReduce, HBase and ZooKeeper. We study DC-bug
characteristics along several axes of analysis such as
the triggering timing condition and input
preconditions, error and failure symptoms, and fix
strategies, collectively stored as 2,083 classification
labels in TaxDC database. We discuss how our study can
open up many new research directions in combating DC
bugs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Mao:2016:RFR,
author = "Junjie Mao and Yu Chen and Qixue Xiao and Yuanchun
Shi",
title = "{RID}: Finding Reference Count Bugs with Inconsistent
Path Pair Checking",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "531--544",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872389",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Reference counts are widely used in OS kernels for
resource management. However, reference counts are not
trivial to be used correctly in large scale programs
because it is left to developers to make sure that an
increment to a reference count is always paired with a
decrement. This paper proposes inconsistent path pair
checking, a novel technique that can statically
discover bugs related to reference counts without
knowing how reference counts should be changed in a
function. A prototype called RID is implemented and
evaluations show that RID can discover more than 80
bugs which were confirmed by the developers in the
latest Linux kernel. The results also show that RID
tends to reveal bugs caused by developers'
misunderstanding on API specifications or error
conditions that are not handled properly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Zhang:2016:MPU,
author = "Huazhe Zhang and Henry Hoffmann",
title = "Maximizing Performance Under a Power Cap: a Comparison
of Hardware, Software, and Hybrid Techniques",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "545--559",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872375",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Power and thermal dissipation constrain multicore
performance scaling. Modern processors are built such
that they could sustain damaging levels of power
dissipation, creating a need for systems that can
implement processor power caps. A particular challenge
is developing systems that can maximize performance
within a power cap, and approaches have been proposed
in both software and hardware. Software approaches are
flexible, allowing multiple hardware resources to be
coordinated for maximum performance, but software is
slow, requiring a long time to converge to the power
target. In contrast, hardware power capping quickly
converges to the the power cap, but only manages
voltage and frequency, limiting its potential
performance. In this work we propose PUPiL, a hybrid
software/hardware power capping system. Unlike previous
approaches, PUPiL combines hardware's fast reaction
time with software's flexibility. We implement PUPiL on
real Linux/x86 platform and compare it to Intel's
commercial hardware power capping system for both
single and multi-application workloads. We find PUPiL
provides the same reaction time as Intel's hardware
with significantly higher performance. On average,
PUPiL outperforms hardware by from 1:18-2:4 depending
on workload and power target. Thus, PUPiL provides a
promising way to enforce power caps with greater
performance than current state-of-the-art hardware-only
approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Fan:2016:CSG,
author = "Songchun Fan and Seyed Majid Zahedi and Benjamin C.
Lee",
title = "The Computational Sprinting Game",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "561--575",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872383",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Computational sprinting is a class of mechanisms that
boost performance but dissipate additional power. We
describe a sprinting architecture in which many,
independent chip multiprocessors share a power supply
and sprints are constrained by the chips' thermal
limits and the rack's power limits. Moreover, we
present the computational sprinting game, a multi-agent
perspective on managing sprints. Strategic agents
decide whether to sprint based on application phases
and system conditions. The game produces an equilibrium
that improves task throughput for data analytics
workloads by 4-6$ \times $ over prior greedy heuristics
and performs within 90\% of an upper bound on
throughput from a globally optimized policy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Colin:2016:EIF,
author = "Alexei Colin and Graham Harvey and Brandon Lucia and
Alanson P. Sample",
title = "An Energy-interference-free Hardware-Software Debugger
for Intermittent Energy-harvesting Systems",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "577--589",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872409",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Energy-autonomous computing devices have the potential
to extend the reach of computing to a scale beyond
either wired or battery-powered systems. However, these
devices pose a unique set of challenges to application
developers who lack both hardware and software support
tools. Energy harvesting devices experience power
intermittence which causes the system to reset and
power-cycle unpredictably, tens to hundreds of times
per second. This can result in code execution errors
that are not possible in continuously-powered systems
and cannot be diagnosed with conventional debugging
tools such as JTAG and/or oscilloscopes. We propose the
Energy-interference-free Debugger, a hardware and
software platform for monitoring and debugging
intermittent systems without adversely effecting their
energy state. The Energy-interference-free Debugger
re-creates a familiar debugging environment for
intermittent software and augments it with debugging
primitives for effective diagnosis of intermittence
bugs. Our evaluation of the Energy-interference-free
Debugger quantifies its energy-interference-freedom and
shows its value in a set of debugging tasks in complex
test programs and several real applications, including
RFID code and a machine-learning-based activity
recognition system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Witchel:2016:PPW,
author = "Emmett Witchel",
title = "Programmer Productivity in a World of Mushy
Interfaces: Challenges of the Post-{ISA} Reality",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "591--591",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2876511",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Since 1964, we had the notion that the instruction set
architecture (ISA) is a useful and fairly opaque
abstraction layer between hardware and software.
Software rode hardware's performance wave while
remaining gloriously oblivious to hardware's growing
complexity. Unfortunately, the jig is up. We still have
ISAs, but the abstraction no longer offers seamless
portability---parallel software needs to be tuned for
different core counts, and heterogeneous processing
elements (CPUs, GPUs, accelerators) further complicate
programmability. We are better at building large-scale
heterogeneous processors than we are at programming
them. Maintaining software across multiple current
platforms is difficult and porting to future platforms
is also difficult. There have been many technical
responses: virtual ISAs (e.g., NVIDIA's PTX),
higher-level programming interfaces (e.g., CUDA or
OpenCL), and late-stage compilation and
platform-specific tailoring (e.g., Android ART), etc. A
team of opinionated experts, drawn from the three
ASPLOS communities will examine the problem of
programmer productivity in the post-ISA world, first
from the perspective of their area of expertise and
then noting the contributions from the other two
communities. What research will save us and how? This
wide-ranging debate will frame important research areas
for future work while being grounded in frank
discussion about what has succeeded in the past.
Attendees can expect actionable insight into important
research issues as well an entertaining discussion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Angstadt:2016:RPP,
author = "Kevin Angstadt and Westley Weimer and Kevin Skadron",
title = "{RAPID} Programming of Pattern-Recognition
Processors",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "593--605",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872393",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present RAPID, a high-level programming language
and combined imperative and declarative model for
programming pattern-recognition processors, such as
Micron's Automata Processor (AP). The AP is a novel,
non-Von Neumann architecture for direct execution of
non-deterministic finite automata (NFAs), and has been
demonstrated to provide substantial speedup for a
variety of data-processing applications. RAPID is
clear, maintainable, concise, and efficient both at
compile and run time. Language features, such as code
abstraction and parallel control structures, map well
to pattern-matching problems, providing clarity and
maintainability. For generation of efficient runtime
code, we present algorithms to convert RAPID programs
into finite automata. Further, we introduce a
tessellation technique for configuring the AP, which
significantly reduces compile time, increases
programmer productivity, and improves maintainability.
We evaluate five RAPID programs against custom,
baseline implementations previously demonstrated to be
significantly accelerated by the AP. We find that RAPID
programs are much shorter in length, are expressible at
a higher level of abstraction than their handcrafted
counterparts, and yield generated code that is often
more compact. In addition, our tessellation technique
for configuring the AP has comparable device
utilization to, and results in compilation that is up
to four orders of magnitude faster than, current
solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Sui:2016:PCA,
author = "Xin Sui and Andrew Lenharth and Donald S. Fussell and
Keshav Pingali",
title = "Proactive Control of Approximate Programs",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "607--621",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872402",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Approximate computing trades off accuracy of results
for resources such as energy or computing time. There
is a large and rapidly growing literature on
approximate computing that has focused mostly on
showing the benefits of approximate computing. However,
we know relatively little about how to control
approximation in a disciplined way. In this paper, we
address the problem of controlling approximation for
non-streaming programs that have a set of ``knobs''
that can be dialed up or down to control the level of
approximation of different components in the program.
We formulate this control problem as a constrained
optimization problem, and describe a system called
Capri that uses machine learning to learn cost and
error models for the program, and uses these models to
determine, for a desired level of approximation, knob
settings that optimize metrics such as running time or
energy usage. Experimental results with complex
benchmarks from different problem domains demonstrate
the effectiveness of this approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Park:2016:ATC,
author = "Jongse Park and Emmanuel Amaro and Divya Mahajan and
Bradley Thwaites and Hadi Esmaeilzadeh",
title = "{AxGames}: Towards Crowdsourcing Quality Target
Determination in Approximate Computing",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "623--636",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872376",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Approximate computing trades quality of application
output for higher efficiency and performance.
Approximation is useful only if its impact on
application output quality is acceptable to the users.
However, there is a lack of systematic solutions and
studies that explore users' perspective on the effects
of approximation. In this paper, we seek to provide one
such solution for the developers to probe and discover
the boundary of quality loss that most users will deem
acceptable. We propose AxGames, a crowdsourced solution
that enables developers to readily infer a statistical
common ground from the general public through three
entertaining games. The users engage in these games by
betting on their opinion about the quality loss of the
final output while the AxGames framework collects
statistics about their perceptions. The framework then
statistically analyzes the results to determine the
acceptable levels of quality for a pair of
(application, approximation technique). The three games
are designed such that they effectively capture quality
requirements with various tradeoffs and contexts. To
evaluate AxGames, we examine seven diverse applications
that produce user perceptible outputs and cover a wide
range of domains, including image processing, optical
character recognition, speech to text conversion, and
audio processing. We recruit 700 participants/users
through Amazon's Mechanical Turk to play the games that
collect statistics about their perception on different
levels of quality. Subsequently, the AxGames framework
uses the Clopper-Pearson exact method, which computes a
binomial proportion confidence interval, to analyze the
collected statistics for each level of quality. Using
this analysis, AxGames can statistically project the
quality level that satisfies a given percentage of
users. The developers can use these statistical
projections to tune the level of approximation based on
the user experience. We find that the level of
acceptable quality loss significantly varies across
applications. For instance, to satisfy 90\% of users,
the level of acceptable quality loss is 2\% for one
application (image processing) and 26\% for another
(audio processing). Moreover, the pattern with which
the crowd responds to approximation takes significantly
different shape and form depending on the class of
applications. These results confirm the necessity of
solutions that systematically explore the effect of
approximation on the end user experience.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Bornholt:2016:DBA,
author = "James Bornholt and Randolph Lopez and Douglas M.
Carmean and Luis Ceze and Georg Seelig and Karin
Strauss",
title = "A {DNA}-Based Archival Storage System",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "637--649",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872397",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Demand for data storage is growing exponentially, but
the capacity of existing storage media is not keeping
up. Using DNA to archive data is an attractive
possibility because it is extremely dense, with a raw
limit of 1 exabyte/mm$^3$ (109 GB/mm$^3$ ), and
long-lasting, with observed half-life of over 500
years. This paper presents an architecture for a
DNA-based archival storage system. It is structured as
a key-value store, and leverages common biochemical
techniques to provide random access. We also propose a
new encoding scheme that offers controllable
redundancy, trading off reliability for density. We
demonstrate feasibility, random access, and robustness
of the proposed encoding with wet lab experiments
involving 151 kB of synthesized DNA and a 42 kB
random-access subset, and simulation experiments of
larger sets calibrated to the wet lab experiments.
Finally, we highlight trends in biotechnology that
indicate the impending practicality of DNA storage for
much larger datasets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Prabhakar:2016:GCH,
author = "Raghu Prabhakar and David Koeplinger and Kevin J.
Brown and HyoukJoong Lee and Christopher {De Sa} and
Christos Kozyrakis and Kunle Olukotun",
title = "Generating Configurable Hardware from Parallel
Patterns",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "651--665",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872415",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In recent years the computing landscape has seen an
increasing shift towards specialized accelerators.
Field programmable gate arrays (FPGAs) are particularly
promising for the implementation of these accelerators,
as they offer significant performance and energy
improvements over CPUs for a wide class of applications
and are far more flexible than fixed-function ASICs.
However, FPGAs are difficult to program. Traditional
programming models for reconfigurable logic use
low-level hardware description languages like Verilog
and VHDL, which have none of the productivity features
of modern software languages but produce very efficient
designs, and low-level software languages like C and
OpenCL coupled with high-level synthesis (HLS) tools
that typically produce designs that are far less
efficient. Functional languages with parallel patterns
are a better fit for hardware generation because they
provide high-level abstractions to programmers with
little experience in hardware design and avoid many of
the problems faced when generating hardware from
imperative languages. In this paper, we identify two
important optimizations for using parallel patterns to
generate efficient hardware: tiling and metapipelining.
We present a general representation of tiled parallel
patterns, and provide rules for automatically tiling
patterns and generating metapipelines. We demonstrate
experimentally that these optimizations result in
speedups up to 39.4$ \times $ on a set of benchmarks
from the data analytics domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Chang:2016:DLD,
author = "Li-Wen Chang and Hee-Seok Kim and Wen-mei W. Hwu",
title = "{DySel}: Lightweight Dynamic Selection for
Kernel-based Data-parallel Programming Model",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "667--680",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872373",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The rising pressure for simultaneously improving
performance and reducing power is driving more
diversity into all aspects of computing devices. An
algorithm that is well-matched to the target hardware
can run multiple times faster and more energy
efficiently than one that is not. The problem is
complicated by the fact that a program's input also
affects the appropriate choice of algorithm. As a
result, software developers have been faced with the
challenge of determining the appropriate algorithm for
each potential combination of target device and data.
This paper presents DySel, a novel runtime system for
automating such determination for kernel-based data
parallel programming models such as OpenCL, CUDA,
OpenACC, and C++AMP. These programming models cover
many applications that demand high performance in
mobile, cloud and high-performance computing. DySel
systematically deploys candidate kernels on a small
portion of the actual data to determine which achieves
the best performance for the hardware-data combination.
The test-deployment, referred to as micro-profiling,
contributes to the final execution result and incurs
less than 8\% of overhead in the worst observed case
when compared to an oracle. We show four major use
cases where DySel provides significantly more
consistent performance without tedious effort from the
developer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Chen:2016:BQA,
author = "Quan Chen and Hailong Yang and Jason Mars and Lingjia
Tang",
title = "{Baymax}: {QoS} Awareness and Increased Utilization
for Non-Preemptive Accelerators in Warehouse Scale
Computers",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "681--696",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872368",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern warehouse-scale computers (WSCs) are being
outfitted with accelerators to provide the significant
compute required by emerging intelligent personal
assistant (IPA) workloads such as voice recognition,
image classification, and natural language processing.
It is well known that the diurnal user access pattern
of user-facing services provides a strong incentive to
co-locate applications for better accelerator
utilization and efficiency, and prior work has focused
on enabling co-location on multicore processors.
However, interference when co-locating applications on
non-preemptive accelerators is fundamentally different
than contention on multi-core CPUs and introduces a new
set of challenges to reduce QoS violation. To address
this open problem, we first identify the underlying
causes for QoS violation in accelerator-outfitted
servers. Our experiments show that queuing delay for
the compute resources and PCI-e bandwidth contention
for data transfer are the main two factors that
contribute to the long tails of user-facing
applications. We then present Baymax, a runtime system
that orchestrates the execution of compute tasks from
different applications and mitigates PCI-e bandwidth
contention to deliver the required QoS for user-facing
applications and increase the accelerator utilization.
Using DjiNN, a deep neural network service, Sirius, an
end-to-end IPA workload, and traditional applications
on a Nvidia K40 GPU, our evaluation shows that Baymax
improves the accelerator utilization by 91.3\% while
achieving the desired 99\%-ile latency target for for
user-facing applications. In fact, Baymax reduces the
99\%-ile latency of user-facing applications by up to
195x over default execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Nowatzki:2016:ABS,
author = "Tony Nowatzki and Karthikeyan Sankaralingam",
title = "Analyzing Behavior Specialized Acceleration",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "697--711",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872412",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware specialization has become a promising
paradigm for overcoming the inefficiencies of general
purpose microprocessors. Of significant interest are
Behavioral Specialized Accelerators (BSAs), which are
designed to efficiently execute code with only certain
properties, but remain largely configurable or
programmable. The most important strength of BSAs ---
their ability to target a wide variety of codes ---
also makes their interactions and analysis complex,
raising the following questions: can multiple BSAs be
composed synergistically, what are their interactions
with the general purpose core, and what combinations
favor which workloads? From a methodological
standpoint, BSAs are also challenging, as they each
require ISA development, compiler and assembler
extensions, and either simulator or RTL models. To
study the potential of BSAs, we propose a novel
modeling technique called the Transformable Dependence
Graph (TDG) --- a higher level alternative to the
time-consuming traditional compiler+simulator approach,
while still enabling detailed microarchitectural models
for both general cores and accelerators. We then
propose a multi-BSA organization, called ExoCore, which
we model and study using the TDG. A design space
exploration reveals that an ExoCore organization can
push designs beyond the established energy-performance
frontiers for general purpose cores. For example, a
2-wide OOO processor with three BSAs matches the
performance of a conventional 6-wide OOO core, has 40\%
lower area, and is 2.6x more energy efficient.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Yoon:2016:PPI,
author = "Man-Ki Yoon and Negin Salajegheh and Yin Chen and
Mihai Christodorescu",
title = "{PIFT}: Predictive Information-Flow Tracking",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "713--725",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872403",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Phones today carry sensitive information and have a
great number of ways to communicate that data. As a
result, malware that steal money, information, or
simply disable functionality have hit the app stores.
Current security solutions for preventing undesirable
data leaks are mostly high-overhead and have not been
practical enough for smartphones. In this paper, we
show that simply monitoring just some instructions
(only memory loads and stores) it is possible to
achieve low overhead, highly accurate information flow
tracking. Our method achieves 98\% accuracy (0\% false
positive and 2\% false negative) over DroidBench and
was able to successfully catch seven real-world malware
instances that steal phone number, location, and device
ID using SMS messages and HTTP connections.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Venkat:2016:HHI,
author = "Ashish Venkat and Sriskanda Shamasunder and Hovav
Shacham and Dean M. Tullsen",
title = "{HIPStR}: Heterogeneous-{ISA} Program State
Relocation",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "727--741",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872408",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Heterogeneous Chip Multiprocessors have been shown to
provide significant performance and energy efficiency
gains over homogeneous designs. Recent research has
expanded the dimensions of heterogeneity to include
diverse Instruction Set Architectures, called
Heterogeneous-ISA Chip Multiprocessors. This work
leverages such an architecture to realize substantial
new security benefits, and in particular, to thwart
Return-Oriented Programming. This paper proposes a
novel security defense called HIPStR ---
Heterogeneous-ISA Program State Relocation --- that
performs dynamic randomization of run-time program
state, both within and across ISAs. This technique
outperforms the state-of-the-art just-in-time code
reuse (JIT-ROP) defense by an average of 15.6\%, while
simultaneously providing greater security guarantees
against classic return-into-libc, ROP, JOP, brute
force, JIT-ROP, and several evasive variants.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Aweke:2016:ASB,
author = "Zelalem Birhanu Aweke and Salessawi Ferede Yitbarek
and Rui Qiao and Reetuparna Das and Matthew Hicks and
Yossi Oren and Todd Austin",
title = "{ANVIL}: Software-Based Protection Against
Next-Generation Rowhammer Attacks",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "743--755",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872390",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Ensuring the integrity and security of the memory
system is critical. Recent studies have shown serious
security concerns due to ``rowhammer'' attacks, where
repeated accesses to a row of memory cause bit flips in
adjacent rows. Recent work by Google's Project Zero has
shown how to leverage rowhammer-induced bit-flips as
the basis for security exploits that include malicious
code injection and memory privilege escalation. Being
an important security concern, industry has attempted
to defend against rowhammer attacks. Deployed defenses
employ two strategies: (1) doubling the system DRAM
refresh rate and (2) restricting access to the CLFLUSH
instruction that attackers use to bypass the cache to
increase memory access frequency (i.e., the rate of
rowhammering). We demonstrate that such defenses are
inadequate: we implement rowhammer attacks that both
avoid using the CLFLUSH instruction and cause bit flips
with a doubled refresh rate. Our next-generation
CLFLUSH-free rowhammer attack bypasses the cache by
manipulating cache replacement state to allow frequent
misses out of the last-level cache to DRAM rows of our
choosing. To protect existing systems from more
advanced rowhammer attacks, we develop a software-based
defense, ANVIL, which thwarts all known rowhammer
attacks on existing systems. ANVIL detects rowhammer
attacks by tracking the locality of DRAM accesses using
existing hardware performance counters. Our detector
identifies the rows being frequently accessed (i.e.,
the aggressors), then selectively refreshes the nearby
victim rows to prevent hammering. Experiments running
on real hardware with the SPEC2006 benchmarks show that
ANVIL has less than a 1\% false positive rate and an
average slowdown of 1\%. ANVIL is low-cost and robust,
and our experiments indicate that it is an effective
approach for protecting existing and future systems
from even advanced rowhammer attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Didona:2016:PAM,
author = "Diego Didona and Nuno Diegues and Anne-Marie Kermarrec
and Rachid Guerraoui and Ricardo Neves and Paolo
Romano",
title = "{ProteusTM}: Abstraction Meets Performance in
Transactional Memory",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "757--771",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872385",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The Transactional Memory (TM) paradigm promises to
greatly simplify the development of concurrent
applications. This led, over the years, to the creation
of a plethora of TM implementations delivering wide
ranges of performance across workloads. Yet, no
universal implementation fits each and every workload.
In fact, the best TM in a given workload can reveal to
be disastrous for another one. This forces developers
to face the complex task of tuning TM implementations,
which significantly hampers their wide adoption. In
this paper, we address the challenge of automatically
identifying the best TM implementation for a given
workload. Our proposed system, ProteusTM, hides behind
the TM interface a large library of implementations.
Underneath, it leverages a novel multi-dimensional
online optimization scheme, combining two popular
learning techniques: Collaborative Filtering and
Bayesian Optimization. We integrated ProteusTM in GCC
and demonstrate its ability to switch between TMs and
adapt several configuration parameters (e.g., number of
threads). We extensively evaluated ProteusTM, obtaining
average performance {$<$3}\% from optimal, and gains up
to 100x over static alternatives.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Shalev:2016:CCS,
author = "Noam Shalev and Eran Harpaz and Hagar Porat and Idit
Keidar and Yaron Weinsberg",
title = "{CSR}: Core Surprise Removal in Commodity Operating
Systems",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "773--787",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872369",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "One of the adverse effects of shrinking transistor
sizes is that processors have become increasingly prone
to hardware faults. At the same time, the number of
cores per die rises. Consequently, core failures can no
longer be ruled out, and future operating systems for
many-core machines will have to incorporate fault
tolerance mechanisms. We present CSR, a strategy for
recovery from unexpected permanent processor faults in
commodity operating systems. Our approach overcomes
surprise removal of faulty cores, and also tolerates
cascading core failures. When a core fails in user
mode, CSR terminates the process executing on that core
and migrates the remaining processes in its run-queue
to other cores. We further show how hardware
transactional memory may be used to overcome failures
in critical kernel code. Our solution is scalable,
incurs low overhead, and is designed to integrate into
modern operating systems. We have implemented it in the
Linux kernel, using Haswell's Transactional
Synchronization Extension, and tested it on a real
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Gangwani:2016:CBS,
author = "Tanmay Gangwani and Adam Morrison and Josep
Torrellas",
title = "{CASPAR}: Breaking Serialization in Lock-Free
Multicore Synchronization",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "2",
pages = "789--804",
month = may,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2980024.2872400",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:42 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In multicores, performance-critical synchronization is
increasingly performed in a lock-free manner using
atomic instructions such as CAS or LL/SC. However, when
many processors synchronize on the same variable,
performance can still degrade significantly. Contending
writes get serialized, creating a non-scalable
condition. Past proposals that build hardware queues of
synchronizing processors do not fundamentally solve
this problem---at best, they help to efficiently
serialize the contending writes. This paper proposes a
novel architecture that breaks the serialization of
hardware queues and enables the queued processors to
perform lock-free synchronization in parallel. The
architecture, called CASPAR, is able to (1) execute the
CASes in the queued-up processors in parallel through
eager forwarding of expected values, and (2) validate
the CASes in parallel and dequeue groups of processors
at a time. The result is highly-scalable
synchronization. We evaluate CASPAR with simulations of
a 64-core chip. Compared to existing proposals with
hardware queues, CASPAR improves the throughput of
kernels by 32\% on average, and reduces the execution
time of the sections considered in lock-free versions
of applications by 47\% on average. This makes these
sections 2.5x faster than in the original
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'16 conference proceedings.",
}
@Article{Albericio:2016:CIN,
author = "Jorge Albericio and Patrick Judd and Tayler
Hetherington and Tor Aamodt and Natalie Enright Jerger
and Andreas Moshovos",
title = "{Cnvlutin}: ineffectual-neuron-free deep neural
network computing",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "1--13",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001138",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This work observes that a large fraction of the
computations performed by Deep Neural Networks (DNNs)
are intrinsically ineffectual as they involve a
multiplication where one of the inputs is zero. This
observation motivates Cnvlutin ( CNV ), a value-based
approach to hardware acceleration that eliminates most
of these ineffectual operations, improving performance
and energy over a state-of-the-art accelerator with no
accuracy loss. CNV uses hierarchical data-parallel
units, allowing groups of lanes to proceed mostly
independently enabling them to skip over the
ineffectual computations. A co-designed data storage
format encodes the computation elimination decisions
taking them off the critical path while avoiding
control divergence in the data parallel units.
Combined, the units and the data storage format result
in a data-parallel architecture that maintains wide,
aligned accesses to its memory hierarchy and that keeps
its data lanes busy. By loosening the ineffectual
computation identification criterion, CNV enables
further performance and energy efficiency improvements,
and more so if a loss in accuracy is acceptable.
Experimental measurements over a set of
state-of-the-art DNNs for image classification show
that CNV improves performance over a state-of-the-art
accelerator from 1.24$ \times $ to 1.55$ \times $ and
by 1.37$ \times $ on average without any loss in
accuracy by removing zero-valued operand
multiplications alone. While CNV incurs an area
overhead of 4.49\%, it improves overall EDP (Energy
Delay Product) and ED$^2$ P (Energy Delay Squared
Product) on average by 1.47$ \times $ and 2.01$ \times
$, respectively. The average performance improvements
increase to 1.52$ \times $ without any loss in accuracy
with a broader ineffectual identification policy.
Further improvements are demonstrated with a loss in
accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Shafiee:2016:ICN,
author = "Ali Shafiee and Anirban Nag and Naveen Muralimanohar
and Rajeev Balasubramonian and John Paul Strachan and
Miao Hu and R. Stanley Williams and Vivek Srikumar",
title = "{ISAAC}: a convolutional neural network accelerator
with in-situ analog arithmetic in crossbars",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "14--26",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001139",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A number of recent efforts have attempted to design
accelerators for popular machine learning algorithms,
such as those involving convolutional and deep neural
networks (CNNs and DNNs). These algorithms typically
involve a large number of multiply-accumulate
(dot-product) operations. A recent project, DaDianNao,
adopts a near data processing approach, where a
specialized neural functional unit performs all the
digital arithmetic operations and receives input
weights from adjacent eDRAM banks. This work explores
an in-situ processing approach, where memristor
crossbar arrays not only store input weights, but are
also used to perform dot-product operations in an
analog manner. While the use of crossbar memory as an
analog dot-product engine is well known, no prior work
has designed or characterized a full-fledged
accelerator based on crossbars. In particular, our work
makes the following contributions: (i) We design a
pipelined architecture, with some crossbars dedicated
for each neural network layer, and eDRAM buffers that
aggregate data between pipeline stages. (ii) We define
new data encoding techniques that are amenable to
analog computations and that can reduce the high
overheads of analog-to-digital conversion (ADC). (iii)
We define the many supporting digital components
required in an analog CNN accelerator and carry out a
design space exploration to identify the best balance
of memristor storage/compute, ADCs, and eDRAM storage
on a chip. On a suite of CNN and DNN workloads, the
proposed ISAAC architecture yields improvements of
14.8$ \times $, 5.5$ \times $, and 7.5$ \times $ in
throughput, energy, and computational density
(respectively), relative to the state-of-the-art
DaDianNao architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Chi:2016:PNP,
author = "Ping Chi and Shuangchen Li and Cong Xu and Tao Zhang
and Jishen Zhao and Yongpan Liu and Yu Wang and Yuan
Xie",
title = "{PRIME}: a novel processing-in-memory architecture for
neural network computation in {ReRAM}-based main
memory",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "27--39",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001140",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Processing-in-memory (PIM) is a promising solution to
address the ``memory wall'' challenges for future
computer systems. Prior proposed PIM architectures put
additional computation logic in or near memory. The
emerging metal-oxide resistive random access memory
(ReRAM) has showed its potential to be used for main
memory. Moreover, with its crossbar array structure,
ReRAM can perform matrix-vector multiplication
efficiently, and has been widely studied to accelerate
neural network (NN) applications. In this work, we
propose a novel PIM architecture, called PRIME, to
accelerate NN applications in ReRAM based main memory.
In PRIME, a portion of ReRAM crossbar arrays can be
configured as accelerators for NN applications or as
normal memory for a larger memory space. We provide
microarchitecture and circuit designs to enable the
morphable functions with an insignificant area
overhead. We also design a software/hardware interface
for software developers to implement various NNs on
PRIME. Benefiting from both the PIM architecture and
the efficiency of using ReRAM for NN computation, PRIME
distinguishes itself from prior work on NN
acceleration, with significant performance improvement
and energy saving. Our experimental results show that,
compared with a state-of-the-art neural processing unit
design, PRIME improves the performance by ~2360$ \times
$ and the energy consumption by ~895$ \times $, across
the evaluated machine learning benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Torng:2016:AAW,
author = "Christopher Torng and Moyang Wang and Christopher
Batten",
title = "Asymmetry-aware work-stealing runtimes",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "40--52",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001142",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Amdahl's law provides architects a compelling reason
to introduce system asymmetry to optimize for both
serial and parallel regions of execution. Asymmetry in
a multicore processor can arise statically (e.g., from
core microarchitecture) or dynamically (e.g., applying
dynamic voltage/frequency scaling). Work stealing is an
increasingly popular approach to task distribution that
elegantly balances task-based parallelism across
multiple worker threads. In this paper, we propose
asymmetry-aware work-stealing (AAWS) runtimes, which
are carefully designed to exploit both the static and
dynamic asymmetry in modern systems. AAWS runtimes use
three key hardware/software techniques: work-pacing,
work-sprinting, and work-mugging. Work-pacing and
work-sprinting are novel techniques that combine a
marginal-utility-based approach with integrated voltage
regulators to improve performance and energy efficiency
in high- and low-parallel regions. Work-mugging is a
previously proposed technique that enables a waiting
big core to preemptively migrate work from a busy
little core. We propose a simple implementation of
work-mugging based on lightweight user-level
interrupts. We use a vertically integrated research
methodology spanning software, architecture, and VLSI
to make the case that holistically combining static
asymmetry, dynamic asymmetry, and work-stealing
runtimes can improve both performance and energy
efficiency in future multicore systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Tseng:2016:MCA,
author = "Hung-Wei Tseng and Qianchen Zhao and Yuxiao Zhou and
Mark Gahagan and Steven Swanson",
title = "{Morpheus}: creating application objects efficiently
for heterogeneous computing",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "53--65",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001143",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In high performance computing systems, object
deserialization can become a surprisingly important
bottleneck---in our test, a set of general-purpose,
highly parallelized applications spends 64\% of total
execution time deserializing data into objects. This
paper presents the Morpheus model, which allows
applications to move such computations to a storage
device. We use this model to deserialize data into
application objects inside storage devices, rather than
in the host CPU. Using the Morpheus model for object
deserialization avoids unnecessary system overheads,
frees up scarce CPU and main memory resources for
compute-intensive workloads, saves I/O bandwidth, and
reduces power consumption. In heterogeneous,
co-processor-equipped systems, Morpheus allows
application objects to be sent directly from a storage
device to a co-processor (e.g., a GPU) by peer-to-peer
transfer, further improving application performance as
well as reducing the CPU and main memory utilizations.
This paper implements Morpheus-SSD, an SSD supporting
the Morpheus model. Morpheus-SSD improves the
performance of object deserialization by 1.66$ \times
$, reduces power consumption by 7\%, uses 42\% less
energy, and speeds up the total execution time by 1.32$
\times $. By using NVMe-P2P that realizes peer-to-peer
communication between Morpheus-SSD and a GPU,
Morpheus-SSD can speed up the total execution time by
1.39$ \times $ in a heterogeneous computing platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Mahajan:2016:TSG,
author = "Divya Mahajan and Amir Yazdanbakhsh and Jongse Park
and Bradley Thwaites and Hadi Esmaeilzadeh",
title = "Towards statistical guarantees in controlling quality
tradeoffs for approximate acceleration",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "66--77",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001144",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Conventionally, an approximate accelerator replaces
every invocation of a frequently executed region of
code without considering the final quality degradation.
However, there is a vast decision space in which each
invocation can either be delegated to the
accelerator---improving performance and efficiency--or
run on the precise core---maintaining quality. In this
paper we introduce Mithra, a co-designed
hardware-software solution, that navigates these
tradeoffs to deliver high performance and efficiency
while lowering the final quality loss. Mithra seeks to
identify whether each individual accelerator invocation
will lead to an undesirable quality loss and, if so,
directs the processor to run the original precise code.
This identification is cast as a binary classification
task that requires a cohesive co-design of hardware and
software. The hardware component performs the
classification at runtime and exposes a knob to the
software mechanism to control quality tradeoffs. The
software tunes this knob by solving a statistical
optimization problem that maximizes benefits from
approximation while providing statistical guarantees
that final quality level will be met with high
confidence. The software uses this knob to tune and
train the hardware classifiers. We devise two distinct
hardware classifiers, one table-based and one neural
network based. To understand the efficacy of these
mechanisms, we compare them with an ideal, but
infeasible design, the oracle. Results show that, with
95\% confidence the table-based design can restrict the
final output quality loss to 5\% for 90\% of unseen
input sets while providing 2.5$ \times $ speedup and
2.6$ \times $ energy efficiency. The neural design
shows similar speedup however, improves the efficiency
by 13\%. Compared to the table-based design, the oracle
improves speedup by 26\% and efficiency by 36\%. These
results show that Mithra performs within a close range
of the oracle and can effectively navigate the quality
tradeoffs in approximate acceleration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Jain:2016:BFL,
author = "Akanksha Jain and Calvin Lin",
title = "Back to the future: leveraging {Belady}'s algorithm
for improved cache replacement",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "78--89",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001146",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Belady's algorithm is optimal but infeasible because
it requires knowledge of the future. This paper
explains how a cache replacement algorithm can
nonetheless learn from Belady's algorithm by applying
it to past cache accesses to inform future cache
replacement decisions. We show that the implementation
is surprisingly efficient, as we introduce a new method
of efficiently simulating Belady's behavior, and we use
known sampling techniques to compactly represent the
long history information that is needed for high
accuracy. For a 2MB LLC, our solution uses a 16KB
hardware budget (excluding replacement state in the tag
array). When applied to a memory-intensive subset of
the SPEC 2006 CPU benchmarks, our solution improves
performance over LRU by 8.4\%, as opposed to 6.2\% for
the previous state-of-the-art. For a 4-core system with
a shared 8MB LLC, our solution improves performance by
15.0\%, compared to 12.0\% for the previous
state-of-the-art.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Park:2016:ESFa,
author = "Caching Hyun Park and Taekyung Heo and Jaehyuk Huh",
title = "Efficient synonym filtering and scalable delayed
translation for hybrid virtual",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "90--102",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001147",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Conventional translation look-aside buffers (TLBs) are
required to complete address translation with short
latencies, as the address translation is on the
critical path of all memory accesses even for L1 cache
hits. Such strict TLB latency restrictions limit the
TLB capacity, as the latency increase with large TLBs
may lower the overall performance even with potential
TLB miss reductions. Furthermore, TLBs consume a
significant amount of energy as they are accessed for
every instruction fetch and data access. To avoid the
latency restriction and reduce the energy consumption,
virtual caching techniques have been proposed to defer
translation to after L1 cache misses. However, an
efficient solution for the synonym problem has been a
critical issue hindering the wide adoption of virtual
caching. Based on the virtual caching concept, this
study proposes a hybrid virtual memory architecture
extending virtual caching to the entire cache
hierarchy, aiming to improve both performance and
energy consumption. The hybrid virtual caching uses
virtual addresses augmented with address space
identifiers (ASID) in the cache hierarchy for common
non-synonym addresses. For such non-synonyms, the
address translation occurs only after last-level cache
(LLC) misses. For uncommon synonym addresses, the
addresses are translated to physical addresses with
conventional TLBs before L1 cache accesses. To support
such hybrid translation, we propose an efficient
synonym detection mechanism based on Bloom filters
which can identify synonym candidates with few false
positives. For large memory applications, delayed
translation alone cannot solve the address translation
problem, as fixed-granularity delayed TLBs may not
scale with the increasing memory requirements. To
mitigate the translation scalability problem, this
study proposes a delayed many segment translation
designed for the hybrid virtual caching. The
experimental results show that our approach effectively
lowers accesses to the TLBs, leading to significant
power savings. In addition, the approach provides
performance improvement with scalable delayed
translation with variable length segments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Cheng:2016:LLB,
author = "Hsiang-Yun Cheng and Jishen Zhao and Jack Sampson and
Mary Jane Irwin and Aamer Jaleel and Yu Lu and Yuan
Xie",
title = "{LAP}: loop-block aware inclusion properties for
energy-efficient asymmetric last level caches",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "103--114",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001148",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging non-volatile memory (NVM) technologies, such
as spin-transfer torque RAM (STT-RAM), are attractive
options for replacing or augmenting SRAM in
implementing last-level caches (LLCs). However, the
asymmetric read/write energy and latency associated
with NVM introduces new challenges in designing caches
where, in contrast to SRAM, dynamic energy from write
operations can be responsible for a larger fraction of
total cache energy than leakage. These properties lead
to the fact that no single traditional inclusion policy
being dominant in terms of LLC energy consumption for
asymmetric LLCs. We propose a novel selective inclusion
policy, Loop-block-Aware Policy ( LAP ), to reduce
energy consumption in LLCs with asymmetric read/write
properties. In order to eliminate redundant writes to
the LLC, LAP incorporates advantages from both
non-inclusive and exclusive designs to selectively
cache only part of upper-level data in the LLC. Results
show that LAP outperforms other variants of selective
inclusion policies and consumes 20\% and 12\% less
energy than non-inclusive and exclusive STT-RAM-based
LLCs, respectively. We extend LAP to a system with
SRAM/STT-RAM hybrid LLCs to achieve energy-efficient
data placement, reducing the energy consumption by 22\%
and 15\% over non-inclusion and exclusion on average,
with average-case performance improvements, small
worst-case performance loss, and minimal hardware
overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Koeplinger:2016:AGE,
author = "David Koeplinger and Christina Delimitrou and Raghu
Prabhakar and Christos Kozyrakis and Yaqi Zhang and
Kunle Olukotun",
title = "Automatic generation of efficient accelerators for
reconfigurable hardware",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "115--127",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001150",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Acceleration in the form of customized datapaths offer
large performance and energy improvements over general
purpose processors. Reconfigurable fabrics such as
FPGAs are gaining popularity for use in implementing
application-specific accelerators, thereby increasing
the importance of having good high-level FPGA design
tools. However, current tools for targeting FPGAs offer
inadequate support for high-level programming, resource
estimation, and rapid and automatic design space
exploration. We describe a design framework that
addresses these challenges. We introduce a new
representation of hardware using parameterized
templates that captures locality and parallelism
information at multiple levels of nesting. This
representation is designed to be automatically
generated from high-level languages based on parallel
patterns. We describe a hybrid area estimation
technique which uses template-level models and
design-level artificial neural networks to account for
effects from hardware place-and-route tools, including
routing overheads, register and block RAM duplication,
and LUT packing. Our runtime estimation accounts for
off-chip memory accesses. We use our estimation
capabilities to rapidly explore a large space of
designs across tile sizes, parallelization factors, and
optional coarse-grained pipelining, all at multiple
loop levels. We show that estimates average 4.8\% error
for logic resources, 6.1\% error for runtimes, and are
279 to 6533 times faster than a commercial high-level
synthesis tool. We compare the best-performing designs
to optimized CPU code running on a server-grade 6 core
processor and show speedups of up to 16.7$ \times $.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Kim:2016:SFA,
author = "Donggyu Kim and Adam Izraelevitz and Christopher Celio
and Hokeun Kim and Brian Zimmer and Yunsup Lee and
Jonathan Bachrach and Krste Asanovi{\'c}",
title = "{Strober}: fast and accurate sample-based energy
simulation for arbitrary {RTL}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "128--139",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001151",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a sample-based energy simulation
methodology that enables fast and accurate estimations
of performance and average power for arbitrary RTL
designs. Our approach uses an FPGA to simultaneously
simulate the performance of an RTL design and to
collect samples containing exact RTL state snapshots.
Each snapshot is then replayed in gate-level
simulation, resulting in a workload-specific average
power estimate with confidence intervals. For arbitrary
RTL and workloads, our methodology guarantees a minimum
of four-orders-of-magnitude speedup over commercial CAD
gate-level simulation tools and gives average energy
estimates guaranteed to be within 5\% of the true
average energy with 99\% confidence. We believe our
open-source sample-based energy simulation tool Strober
can not only rapidly provide ground truth for more
abstract power models, but can enable productive
design-space exploration early in the RTL design
process.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Laurenzano:2016:PIM,
author = "Michael A. Laurenzano and Yunqi Zhang and Jiang Chen
and Lingjia Tang and Jason Mars",
title = "{PowerChop}: identifying and managing non-critical
units in hybrid processor architectures",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "140--152",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001152",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "On-core microarchitectural structures consume
significant portions of a processor's power budget.
However, depending on application characteristics,
those structures do not always provide (much)
performance benefit. While timeout-based power gating
techniques have been leveraged for underutilized cores
and inactive functional units, these techniques have
not directly translated to high-activity units such as
vector processing units, complex branch predictors, and
caches. The performance benefit provided by these units
does not necessarily correspond with unit activity, but
instead is a function of application characteristics.
This work introduces PowerChop, a novel technique that
leverages the unique capabilities of HW/SW co-designed
hybrid processors to enact unit-level power management
at the application phase level. PowerChop adds two
small additional hardware units to facilitate phase
identification and triggering different power states,
enabling the software layer to cheaply track, predict
and take advantage of varying unit criticality across
application phases by powering gating units that are
not needed for performant execution. Through detailed
experimentation, we find that PowerChop significantly
decreases power consumption, reducing the leakage power
of a hybrid server processor by 9\% on average (up to
33\%) and a hybrid mobile processor by 19\% (up to
40\%) while introducing just 2\% slowdown.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Gu:2016:BFN,
author = "Boncheol Gu and Andre S. Yoon and Duck-Ho Bae and
Insoon Jo and Jinyoung Lee and Jonghyun Yoon and
Jeong-Uk Kang and Moonsang Kwon and Chanho Yoon and
Sangyeun Cho and Jaeheon Jeong and Duckhyun Chang",
title = "{Biscuit}: a framework for near-data processing of big
data workloads",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "153--165",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001154",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Data-intensive queries are common in business
intelligence, data warehousing and analytics
applications. Typically, processing a query involves
full inspection of large in-storage data sets by CPUs.
An intuitive way to speed up such queries is to reduce
the volume of data transferred over the storage network
to a host system. This can be achieved by filtering out
extraneous data within the storage, motivating a form
of near-data processing. This work presents Biscuit, a
novel near-data processing framework designed for
modern solid-state drives. It allows programmers to
write a data-intensive application to run on the host
system and the storage system in a distributed, yet
seamless manner. In order to offer a high-level
programming model, Biscuit builds on the concept of
data flow. Data processing tasks communicate through
typed and data-ordered ports. Biscuit does not
distinguish tasks that run on the host system and the
storage system. As the result, Biscuit has desirable
traits like generality and expressiveness, while
promoting code reuse and naturally exposing
concurrency. We implement Biscuit on a host system that
runs the Linux OS and a high-performance solid-state
drive. We demonstrate the effectiveness of our approach
and implementation with experimental results. When data
filtering is done by hardware in the solid-state drive,
the average speed-up obtained for the top five queries
of TPC-H is over 15$ \times $.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Ozdal:2016:EEA,
author = "Muhammet Mustafa Ozdal and Serif Yesil and Taemin Kim
and Andrey Ayupov and John Greth and Steven Burns and
Ozcan Ozturk",
title = "Energy efficient architecture for graph analytics
accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "166--177",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001155",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Specialized hardware accelerators can significantly
improve the performance and power efficiency of compute
systems. In this paper, we focus on hardware
accelerators for graph analytics applications and
propose a configurable architecture template that is
specifically optimized for iterative vertex-centric
graph applications with irregular access patterns and
asymmetric convergence. The proposed architecture
addresses the limitations of the existing multi-core
CPU and GPU architectures for these types of
applications. The SystemC-based template we provide can
be customized easily for different vertex-centric
applications by inserting application-level data
structures and functions. After that, a cycle-accurate
simulator and RTL can be generated to model the target
hardware accelerators. In our experiments, we study
several graph-parallel applications, and show that the
hardware accelerators generated by our template can
outperform a 24 core high end server CPU system by up
to 3x in terms of performance. We also estimate the
area requirement and power consumption of these
hardware accelerators through physical-aware logic
synthesis, and show up to 65x better power consumption
with significantly smaller area.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Magaki:2016:ACS,
author = "Ikuo Magaki and Moein Khazraee and Luis Vega Gutierrez
and Michael Bedford Taylor",
title = "{ASIC} clouds: specializing the datacenter",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "178--190",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001156",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPU and FPGA-based clouds have already demonstrated
the promise of accelerating computing-intensive
workloads with greatly improved power and performance.
In this paper, we examine the design of ASIC Clouds,
which are purpose-built datacenters comprised of large
arrays of ASIC accelerators, whose purpose is to
optimize the total cost of ownership (TCO) of large,
high-volume chronic computations, which are becoming
increasingly common as more and more services are built
around the Cloud model. On the surface, the creation of
ASIC clouds may seem highly improbable due to high NREs
and the inflexibility of ASICs. Surprisingly, however,
large-scale ASIC Clouds have already been deployed by a
large number of commercial entities, to implement the
distributed Bitcoin cryptocurrency system. We begin
with a case study of Bitcoin mining ASIC Clouds, which
are perhaps the largest ASIC Clouds to date. From
there, we design three more ASIC Clouds, including a
YouTube-style video transcoding ASIC Cloud, a Litecoin
ASIC Cloud, and a Convolutional Neural Network ASIC
Cloud and show 2-3 orders of magnitude better TCO
versus CPU and GPU. Among our contributions, we present
a methodology that given an accelerator design, derives
Pareto-optimal ASIC Cloud Servers, by extracting data
from place-and-routed circuits and computational fluid
dynamic simulations, and then employing clever but
brute-force search to find the best jointly-optimized
ASIC, DRAM subsystem, motherboard, power delivery
system, cooling system, operating voltage, and case
design. Moreover, we show how data center parameters
determine which of the many Pareto-optimal points is
TCO-optimal. Finally we examine when it makes sense to
build an ASIC Cloud, and examine the impact of ASIC
NRE.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Oh:2016:AIC,
author = "Yunho Oh and Keunsoo Kim and Myung Kuk Yoon and Jong
Hyun Park and Yongjun Park and Won Woo Ro and Murali
Annavaram",
title = "{APRES}: improving cache efficiency by exploiting load
characteristics on {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "191--203",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001158",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Long memory latency and limited throughput become
performance bottlenecks of GPGPU applications. The
latency takes hundreds of cycles which is difficult to
be hidden by simply interleaving tens of warp
execution. While cache hierarchy helps to reduce memory
system pressure, massive Thread-Level Parallelism (TLP)
often causes excessive cache contention. This paper
proposes Adaptive PREfetching and Scheduling (APRES) to
improve GPU cache efficiency. APRES relies on the
following observations. First, certain static load
instructions tend to generate memory addresses having
very high locality. Second, although loads have no
locality, the access addresses still can show highly
strided access pattern. Third, the locality behavior
tends to be consistent regardless of warp ID. APRES
schedules warps so that as many cache hits generated as
possible before any cache misses generated. This is to
minimize cache thrashing when many warps are contending
for a cache line. However, to realize this operation,
it is required to predict which warp will hit the cache
in the near future. Without directly predicting future
cache hit/miss for each warp, APRES creates a group of
warps that will execute the same load instruction in
the near future. Based on the third observation, we
expect the locality behavior is consistent over all
warps in the group. If the first executed warp in the
group hits the cache, then the load is considered as a
high locality type, and APRES prioritizes all warps in
the group. Group prioritization leads to consecutive
cache hits, because the grouped warps are likely to
access the same cache line. If the first warp missed
the cache, then the load is considered as a strided
type, and APRES generates prefetch requests for the
other warps in the group. After that, APRES prioritizes
prefetch targeted warps so that the demand requests are
merged to Miss Status Holding Register (MSHR) or
prefetched lines can be accessed. On memory-intensive
applications, APRES achieves 31.7\% performance
improvement compared to the baseline GPU and 7.2\%
additional speedup compared to the best combination of
existing warp scheduling and prefetching methods.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Hsieh:2016:TOM,
author = "Kevin Hsieh and Eiman Ebrahimi and Gwangsun Kim and
Niladrish Chatterjee and Mike O'Connor and Nandita
Vijaykumar and Onur Mutlu and Stephen W. Keckler",
title = "Transparent offloading and mapping {(TOM)}: enabling
programmer-transparent near-data processing in {GPU}
systems",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "204--216",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001159",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Main memory bandwidth is a critical bottleneck for
modern GPU systems due to limited off-chip pin
bandwidth. 3D-stacked memory architectures provide a
promising opportunity to significantly alleviate this
bottleneck by directly connecting a logic layer to the
DRAM layers with high bandwidth connections. Recent
work has shown promising potential performance benefits
from an architecture that connects multiple such
3D-stacked memories and offloads bandwidth-intensive
computations to a GPU in each of the logic layers. An
unsolved key challenge in such a system is how to
enable computation offloading and data mapping to
multiple 3D-stacked memories without burdening the
programmer such that any application can transparently
benefit from near-data processing capabilities in the
logic layer. Our paper develops two new mechanisms to
address this key challenge. First, a compiler-based
technique that automatically identifies code to offload
to a logic-layer GPU based on a simple cost-benefit
analysis. Second, a software/hardware cooperative
mechanism that predicts which memory pages will be
accessed by offloaded code, and places those pages in
the memory stack closest to the offloaded code, to
minimize off-chip bandwidth consumption. We call the
combination of these two programmer-transparent
mechanisms TOM: Transparent Offloading and Mapping. Our
extensive evaluations across a variety of modern
memory-intensive GPU workloads show that, without
requiring any program modification, TOM significantly
improves performance (by 30\% on average, and up to
76\%) compared to a baseline GPU system that cannot
offload computation to 3D-stacked memories.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Park:2016:ESFb,
author = "Chang Hyun Park and Taekyung Heo and Jaehyuk Huh",
title = "Efficient synonym filtering and scalable delayed
translation for hybrid virtual caching",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "217--229",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001160",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Conventional translation look-aside buffers (TLBs) are
required to complete address translation with short
latencies, as the address translation is on the
critical path of all memory accesses even for L1 cache
hits. Such strict TLB latency restrictions limit the
TLB capacity, as the latency increase with large TLBs
may lower the overall performance even with potential
TLB miss reductions. Furthermore, TLBs consume a
significant amount of energy as they are accessed for
every instruction fetch and data access. To avoid the
latency restriction and reduce the energy consumption,
virtual caching techniques have been proposed to defer
translation to after L1 cache misses. However, an
efficient solution for the synonym problem has been a
critical issue hindering the wide adoption of virtual
caching. Based on the virtual caching concept, this
study proposes a hybrid virtual memory architecture
extending virtual caching to the entire cache
hierarchy, aiming to improve both performance and
energy consumption. The hybrid virtual caching uses
virtual addresses augmented with address space
identifiers (ASID) in the cache hierarchy for common
non-synonym addresses. For such non-synonyms, the
address translation occurs only after last-level cache
(LLC) misses. For uncommon synonym addresses, the
addresses are translated to physical addresses with
conventional TLBs before L1 cache accesses. To support
such hybrid translation, we propose an efficient
synonym detection mechanism based on Bloom filters
which can identify synonym candidates with few false
positives. For large memory applications, delayed
translation alone cannot solve the address translation
problem, as fixed-granularity delayed TLBs may not
scale with the increasing memory requirements. To
mitigate the translation scalability problem, this
study proposes a delayed many segment translation
designed for the hybrid virtual caching. The
experimental results show that our approach effectively
lowers accesses to the TLBs, leading to significant
power savings. In addition, the approach provides
performance improvement with scalable delayed
translation with variable length segments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Xu:2016:WSE,
author = "Qiumin Xu and Hyeran Jeon and Keunsoo Kim and Won Woo
Ro and Murali Annavaram",
title = "Warped-slicer: efficient intra-{SM} slicing through
dynamic resource partitioning for {GPU}
multiprogramming",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "230--242",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001161",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As technology scales, GPUs are forecasted to
incorporate an ever-increasing amount of computing
resources to support thread-level parallelism. But even
with the best effort, exposing massive thread-level
parallelism from a single GPU kernel, particularly from
general purpose applications, is going to be a
difficult challenge. In some cases, even if there is
sufficient thread-level parallelism in a kernel, there
may not be enough available memory bandwidth to support
such massive concurrent thread execution. Hence, GPU
resources may be underutilized as more general purpose
applications are ported to execute on GPUs. In this
paper, we explore multiprogramming GPUs as a way to
resolve the resource underutilization issue. There is a
growing hardware support for multiprogramming on GPUs.
Hyper-Q has been introduced in the Kepler architecture
which enables multiple kernels to be invoked via tens
of hardware queue streams. Spatial multitasking has
been proposed to partition GPU resources across
multiple kernels. But the partitioning is done at the
coarse granularity of streaming multiprocessors (SMs)
where each kernel is assigned to a subset of SMs. In
this paper, we advocate for partitioning a single SM
across multiple kernels, which we term as intra-SM
slicing. We explore various intra-SM slicing strategies
that slice resources within each SM to concurrently run
multiple kernels on the SM. Our results show that there
is not one intra-SM slicing strategy that derives the
best performance for all application pairs. We propose
Warped-Slicer, a dynamic intra-SM slicing strategy that
uses an analytical method for calculating the SM
resource partitioning across different kernels that
maximizes performance. The model relies on a set of
short online profile runs to determine how each
kernel's performance varies as more thread blocks from
each kernel are assigned to an SM. The model takes into
account the interference effect of shared resource
usage across multiple kernels. The model is also
computationally efficient and can determine the
resource partitioning quickly to enable dynamic
decision making as new kernels enter the system. We
demonstrate that the proposed Warped-Slicer approach
improves performance by 23\% over the baseline
multiprogramming approach with minimal hardware
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Han:2016:EEI,
author = "Song Han and Xingyu Liu and Huizi Mao and Jing Pu and
Ardavan Pedram and Mark A. Horowitz and William J.
Dally",
title = "{EIE}: efficient inference engine on compressed deep
neural network",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "243--254",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001163",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "State-of-the-art deep neural networks (DNNs) have
hundreds of millions of connections and are both
computationally and memory intensive, making them
difficult to deploy on embedded systems with limited
hardware resources and power budgets. While custom
hardware helps the computation, fetching weights from
DRAM is two orders of magnitude more expensive than ALU
operations, and dominates the required power.
Previously proposed 'Deep Compression' makes it
possible to fit large DNNs (AlexNet and VGGNet) fully
in on-chip SRAM. This compression is achieved by
pruning the redundant connections and having multiple
connections share the same weight. We propose an energy
efficient inference engine (EIE) that performs
inference on this compressed network model and
accelerates the resulting sparse matrix-vector
multiplication with weight sharing. Going from DRAM to
SRAM gives EIE 120$ \times $ energy saving; Exploiting
sparsity saves 10$ \times $; Weight sharing gives 8$
\times $; Skipping zero activations from ReLU saves
another 3$ \times $. Evaluated on nine DNN benchmarks,
EIE is 189$ \times $ and 13$ \times $ faster when
compared to CPU and GPU implementations of the same DNN
without compression. EIE has a processing power of 102
GOPS working directly on a compressed network,
corresponding to 3 TOPS on an uncompressed network, and
processes FC layers of AlexNet at 1.88$ \times $10$^4$
frames/sec with a power dissipation of only 600mW. It
is 24,000$ \times $ and 3,400$ \times $ more energy
efficient than a CPU and GPU respectively. Compared
with DaDianNao, EIE has 2.9$ \times $, 19$ \times $ and
3$ \times $ better throughput, energy efficiency and
area efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{LiKamWa:2016:RAC,
author = "Robert LiKamWa and Yunhui Hou and Julian Gao and Mia
Polansky and Lin Zhong",
title = "{RedEye}: analog {ConvNet} image sensor architecture
for continuous mobile vision",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "255--266",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001164",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Continuous mobile vision is limited by the inability
to efficiently capture image frames and process vision
features. This is largely due to the energy burden of
analog readout circuitry, data traffic, and intensive
computation. To promote efficiency, we shift early
vision processing into the analog domain. This results
in RedEye, an analog convolutional image sensor that
performs layers of a convolutional neural network in
the analog domain before quantization. We design RedEye
to mitigate analog design complexity, using a modular
column-parallel design to promote physical design reuse
and algorithmic cyclic reuse. RedEye uses programmable
mechanisms to admit noise for tunable energy reduction.
Compared to conventional systems, RedEye reports an
85\% reduction in sensor energy, 73\% reduction in
cloudlet-based system energy, and a 45\% reduction in
computation-based system energy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Reagen:2016:MEL,
author = "Brandon Reagen and Paul Whatmough and Robert Adolf and
Saketh Rama and Hyunkwang Lee and Sae Kyu Lee and
Jos{\'e} Miguel Hern{\'a}ndez-Lobato and Gu-Yeon Wei
and David Brooks",
title = "{Minerva}: enabling low-power, highly-accurate deep
neural network accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "267--278",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001165",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The continued success of Deep Neural Networks (DNNs)
in classification tasks has sparked a trend of
accelerating their execution with specialized hardware.
While published designs easily give an order of
magnitude improvement over general-purpose hardware,
few look beyond an initial implementation. This paper
presents Minerva, a highly automated co-design approach
across the algorithm, architecture, and circuit levels
to optimize DNN hardware accelerators. Compared to an
established fixed-point accelerator baseline, we show
that fine-grained, heterogeneous datatype optimization
reduces power by 1.5$ \times $; aggressive, inline
predication and pruning of small activity values
further reduces power by 2.0$ \times $; and active
hardware fault detection coupled with domain-aware
error mitigation eliminates an additional 2.7$ \times $
through lowering SRAM voltages. Across five datasets,
these optimizations provide a collective average of
8.1$ \times $ power reduction over an accelerator
baseline without compromising DNN model accuracy.
Minerva enables highly accurate, ultra-low power DNN
accelerators (in the range of tens of milliwatts),
making it feasible to deploy DNNs in power-constrained
IoT and mobile devices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Yao:2016:OCO,
author = "Yuan Yao and Zhonghai Lu",
title = "Opportunistic competition overhead reduction for
expediting critical section in {NoC} based {CMPs}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "279--290",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001167",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the degree of parallelism increasing, performance
of multi-threaded shared variable applications is not
only limited by serialized critical section execution,
but also by the serialized competition overhead for
threads to get access to critical section. As the
number of concurrent threads grows, such competition
overhead may exceed the time spent in critical section
itself, and become the dominating factor limiting the
performance of parallel applications. In modern
operating systems, queue spinlock, which comprises a
low-overhead spinning phase and a high-overhead
sleeping phase, is often used to lock critical
sections. In the paper, we show that this advanced
locking solution may create very high competition
overhead for multithreaded applications executing in
NoC-based CMPs. Then we propose a software-hardware
cooperative mechanism that can opportunistically
maximize the chance that a thread wins the critical
section access in the low-overhead spinning phase,
thereby reducing the competition overhead. At the OS
primitives level, we monitor the remaining times of
retry (RTR) in a thread's spinning phase, which
reflects in how long the thread must enter into the
high-overhead sleep mode. At the hardware level, we
integrate the RTR information into the packets of
locking requests, and let the NoC prioritize locking
request packets according to the RTR information. The
principle is that the smaller RTR a locking request
packet carries, the higher priority it gets and thus
quicker delivery. We evaluate our opportunistic
competition overhead reduction technique with
cycle-accurate full-system simulations in GEM5 using
PARSEC (11 programs) and SPEC OMP2012 (14 programs)
benchmarks. Compared to the original queue spinlock
implementation, experimental results show that our
method can effectively increase the opportunity of
threads entering the critical section in low-overhead
spinning phase, reducing the competition overhead
averagely by 39.9\% (maximally by 61.8\%) and
accelerating the execution of the Region-of-Interest
averagely by 14.4\% (maximally by 24.5\%) across all 25
benchmark programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Kim:2016:SCD,
author = "Channoh Kim and Sungmin Kim and Hyeon Gyu Cho and
Dooyoung Kim and Jaehyeok Kim and Young H. Oh and
Hakbeom Jang and Jae W. Lee",
title = "Short-circuit dispatch: accelerating virtual machine
interpreters on embedded processors",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "291--303",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001168",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Interpreters are widely used to implement high-level
language virtual machines (VMs), especially on
resource-constrained embedded platforms. Many scripting
languages employ interpreter-based VMs for their
advantages over native code compilers, such as
portability, smaller resource footprint, and compact
codes. For efficient interpretation a script (program)
is first compiled into an intermediate representation,
or bytecodes. The canonical interpreter then runs an
infinite loop that fetches, decodes, and executes one
bytecode at a time. This bytecode dispatch loop is a
well-known source of inefficiency, typically featuring
a large jump table with a hard-to-predict indirect
jump. Most existing techniques to optimize this loop
focus on reducing the misprediction rate of this
indirect jump in both hardware and software. However,
these techniques are much less effective on embedded
processors with shallow pipelines and low IPCs.
Instead, we tackle another source of inefficiency more
prominent on embedded platforms--redundant computation
in the dispatch loop. To this end, we propose
Short-Circuit Dispatch (SCD), a low-cost architectural
extension that enables fast, hardware-based bytecode
dispatch with fewer instructions. The key idea of SCD
is to overlay the software-created bytecode jump table
on a branch target buffer (BTB). Once a bytecode is
fetched, the BTB is looked up using the bytecode,
instead of PC, as key. If it hits, the interpreter
directly jumps to the target address retrieved from the
BTB; otherwise, it goes through the original dispatch
path. This effectively eliminates redundant computation
in the dispatcher code for decode, bound check, and
target address calculation, thus significantly reducing
total instruction count. Our simulation results
demonstrate that SCD achieves geomean speedups of
19.9\% and 14.1\% for two production-grade script
interpreters for Lua and JavaScript, respectively.
Moreover, our fully synthesizable RTL design based on a
RISC-V embedded processor shows that SCD improves the
EDP of the Lua interpreter by 24.2\%, while increasing
the chip area by only 0.72\% at a 40nm technology
node.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Dall:2016:AVP,
author = "Christoffer Dall and Shih-Wei Li and Jin Tack Lim and
Jason Nieh and Georgios Koloventzos",
title = "{ARM} virtualization: performance and architectural
implications",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "304--316",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001169",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "ARM servers are becoming increasingly common, making
server technologies such as virtualization for ARM of
growing importance. We present the first study of ARM
virtualization performance on server hardware,
including multicore measurements of two popular ARM and
x86 hypervisors, KVM and Xen. We show how ARM hardware
support for virtualization can enable much faster
transitions between VMs and the hypervisor, a key
hypervisor operation. However, current hypervisor
designs, including both Type 1 hypervisors such as Xen
and Type 2 hypervisors such as KVM, are not able to
leverage this performance benefit for real application
workloads. We discuss the reasons why and show that
other factors related to hypervisor software design and
implementation have a larger role in overall
performance. Based on our measurements, we discuss
changes to ARM's hardware virtualization support that
can potentially bridge the gap to bring its faster
VM-to-hypervisor transition mechanism to modern Type 2
hypervisors running real applications. These changes
have been incorporated into the latest ARM
architecture.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Gaur:2016:BVC,
author = "Jayesh Gaur and Alaa R. Alameldeen and Sreenivas
Subramoney",
title = "Base-victim compression: an opportunistic cache
compression architecture",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "317--328",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001171",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The memory wall has motivated many enhancements to
cache management policies aimed at reducing misses.
Cache compression has been proposed to increase
effective cache capacity, which potentially reduces
capacity and conflict misses. However, complexity in
cache compression implementations could increase cache
power and access latency. On the other hand, advanced
cache replacement mechanisms use heuristics to reduce
misses, leading to significant performance gains. Both
cache compression and replacement policies should
collaborate to improve performance. In this paper, we
demonstrate that cache compression and replacement
policies can interact negatively. In many workloads,
performance gains from replacement policies are lost
due to the need to alter the replacement policy to
accommodate compression. This leads to sub-optimal
replacement policies that could lose performance
compared to an uncompressed cache. We introduce a
novel, opportunistic cache compression mechanism,
Base-Victim, based on an efficient cache design. Our
compression architecture improves performance on top of
advanced cache replacement policies, and guarantees a
hit rate at least as high as that of an uncompressed
cache. For cache-sensitive applications, Base-Victim
achieves an average 7.3\% performance gain for
single-threaded workloads, and 8.7\% gain for
four-thread multi-program workload mixes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Kim:2016:BPC,
author = "Jungrae Kim and Michael Sullivan and Esha Choukse and
Mattan Erez",
title = "Bit-plane compression: transforming data for better
compression in many-core architectures",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "329--340",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001172",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As key applications become more data-intensive and the
computational throughput of processors increases, the
amount of data to be transferred in modern memory
subsystems grows. Increasing physical bandwidth to keep
up with the demand growth is challenging, however, due
to strict area and energy limitations. This paper
presents a novel and lightweight compression algorithm,
Bit-Plane Compression (BPC), to increase the effective
memory bandwidth. BPC aims at homogeneously-typed
memory blocks, which are prevalent in many-core
architectures, and applies a smart data transformation
to both improve the inherent data compressibility and
to reduce the complexity of compression hardware. We
demonstrate that BPC provides superior compression
ratios of 4.1:1 for integer benchmarks and reduces
memory bandwidth requirements significantly.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Nair:2016:XEE,
author = "Prashant J. Nair and Vilas Sridharan and Moinuddin K.
Qureshi",
title = "{XED}: exposing on-die error detection information for
strong memory reliability",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "341--353",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001174",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Large-granularity memory failures continue to be a
critical impediment to system reliability. To make
matters worse, as DRAM scales to smaller nodes, the
frequency of unreliable bits in DRAM chips continues to
increase. To mitigate such scaling-related failures,
memory vendors are planning to equip existing DRAM
chips with On-Die ECC. For maintaining compatibility
with memory standards, On-Die ECC is kept invisible
from the memory controller. This paper explores how to
design high reliability memory systems in presence of
On-Die ECC. We show that if On-Die ECC is not exposed
to the memory system, having a 9-chip ECC-DIMM
(implementing SECDED) provides almost no reliability
benefits compared to an 8-chip non-ECC DIMM. We also
show that if the error detection of On-Die ECC can be
exposed to the memory controller, then Chipkill-level
reliability can be achieved even with a 9-chip
ECC-DIMM. To this end, we propose eXposed On-Die Error
Detection (XED), which exposes the On-Die error
detection information without requiring changes to the
memory standards or consuming bandwidth overheads. When
the On-Die ECC detects an error, XED transmits a
pre-defined ``catch-word'' instead of the corrected
data value. On receiving the catch-word, the memory
controller uses the parity stored in the 9-chip of the
ECC-DIMM to correct the faulty chip (similar to
RAID-3). Our studies show that XED provides
Chipkill-level reliability (172x higher than SECDED),
while incurring negligible overheads, with a 21\% lower
execution time than Chipkill. We also show that XED can
enable Chipkill systems to provide Double-Chipkill
level reliability while avoiding the associated
storage, performance, and power overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{ulAlam:2016:PRS,
author = "Mohammad Mejbah ul Alam and Abdullah Muzahid",
title = "Production-run software failure diagnosis via
\underline{a}daptive \underline{c}ommunication
\underline{t}racking",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "354--366",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001175",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software failure diagnosis techniques work either by
sampling some events at production-run time or by using
some bug detection algorithms. Some of the techniques
require the failure to be reproduced multiple times.
The ones that do not require such, are not adaptive
enough when the execution platform, environment or code
changes. We propose ACT, a diagnosis technique for
production-run failures, that uses the machine
intelligence of neural hardware. ACT learns some
invariants (e.g., data communication invariants)
on-the-fly using the neural hardware and records any
potential violation of them. Since ACT can learn
invariants on-the-fly, it can adapt to any change in
execution setting or code. Since it records only the
potentially violated invariants, the postprocessing
phase can pinpoint the root cause fairly accurately
without requiring to observe the failure again. ACT
works seamlessly for many sequential and concurrency
bugs. The paper provides a detailed design and
implementation of ACT in a typical multiprocessor
system. It uses a three stage pipeline for partially
configurable one hidden layer neural networks. We have
evaluated ACT on a variety of programs from popular
benchmarks as well as open source programs. ACT
diagnoses failures caused by 16 bugs from these
programs with accurate ranking. Compared to existing
learning and sampling based approaches, ACT has better
diagnostic ability. For the default configuration, ACT
has an average execution overhead of 8.2\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Chen:2016:ESA,
author = "Yu-Hsin Chen and Joel Emer and Vivienne Sze",
title = "{Eyeriss}: a spatial architecture for energy-efficient
dataflow for convolutional neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "367--379",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001177",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Deep convolutional neural networks (CNNs) are widely
used in modern AI systems for their superior accuracy
but at the cost of high computational complexity. The
complexity comes from the need to simultaneously
process hundreds of filters and channels in the
high-dimensional convolutions, which involve a
significant amount of data movement. Although
highly-parallel compute paradigms, such as SIMD/SIMT,
effectively address the computation requirement to
achieve high throughput, energy consumption still
remains high as data movement can be more expensive
than computation. Accordingly, finding a dataflow that
supports parallel processing with minimal data movement
cost is crucial to achieving energy-efficient CNN
processing without compromising accuracy. In this
paper, we present a novel dataflow, called
row-stationary (RS), that minimizes data movement
energy consumption on a spatial architecture. This is
realized by exploiting local data reuse of filter
weights and feature map pixels, i.e., activations, in
the high-dimensional convolutions, and minimizing data
movement of partial sum accumulations. Unlike dataflows
used in existing designs, which only reduce certain
types of data movement, the proposed RS dataflow can
adapt to different CNN shape configurations and reduces
all types of data movement through maximally utilizing
the processing engine (PE) local storage, direct
inter-PE communication and spatial parallelism. To
evaluate the energy efficiency of the different
dataflows, we propose an analysis framework that
compares energy cost under the same hardware area and
processing parallelism constraints. Experiments using
the CNN configurations of AlexNet show that the
proposed RS dataflow is more energy efficient than
existing dataflows in both convolutional (1.4$ \times $
to 2.5$ \times $) and fully-connected layers (at least
1.3$ \times $ for batch size larger than 16). The RS
dataflow has also been demonstrated on a fabricated
chip, which verifies our energy analysis.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Kim:2016:NPD,
author = "Duckhwan Kim and Jaeha Kung and Sek Chai and Sudhakar
Yalamanchili and Saibal Mukhopadhyay",
title = "{Neurocube}: a programmable digital neuromorphic
architecture with high-density {$3$D} memory",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "380--392",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001178",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents a programmable and scalable
digital neuromorphic architecture based on 3D
high-density memory integrated with logic tier for
efficient neural computing. The proposed architecture
consists of clusters of processing engines, connected
by 2D mesh network as a processing tier, which is
integrated in 3D with multiple tiers of DRAM. The PE
clusters access multiple memory channels (vaults) in
parallel. The operating principle, referred to as the
memory centric computing, embeds specialized
state-machines within the vault controllers of HMC to
drive data into the PE clusters. The paper presents the
basic architecture of the Neurocube and an analysis of
the logic tier synthesized in 28nm and 15nm process
technologies. The performance of the Neurocube is
evaluated and illustrated through the mapping of a
Convolutional Neural Network and estimating the
subsequent power and performance for both training and
inference.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Liu:2016:CIS,
author = "Shaoli Liu and Zidong Du and Jinhua Tao and Dong Han
and Tao Luo and Yuan Xie and Yunji Chen and Tianshi
Chen",
title = "{Cambricon}: an instruction set architecture for
neural networks",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "393--405",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001179",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Neural Networks (NN) are a family of models for a
broad range of emerging machine learning and pattern
recondition applications. NN techniques are
conventionally executed on general-purpose processors
(such as CPU and GPGPU), which are usually not
energy-efficient since they invest excessive hardware
resources to flexibly support various workloads.
Consequently, application-specific hardware
accelerators for neural networks have been proposed
recently to improve the energy-efficiency. However,
such accelerators were designed for a small set of NN
techniques sharing similar computational patterns, and
they adopt complex and informative instructions
(control signals) directly corresponding to high-level
functional blocks of an NN (such as layers), or even an
NN as a whole. Although straightforward and
easy-to-implement for a limited set of similar NN
techniques, the lack of agility in the instruction set
prevents such accelerator designs from supporting a
variety of different NN techniques with sufficient
flexibility and efficiency. In this paper, we propose a
novel domain-specific Instruction Set Architecture
(ISA) for NN accelerators, called Cambricon, which is a
load-store architecture that integrates scalar, vector,
matrix, logical, data transfer, and control
instructions, based on a comprehensive analysis of
existing NN techniques. Our evaluation over a total of
ten representative yet distinct NN techniques have
demonstrated that Cambricon exhibits strong descriptive
capacity over a broad range of NN techniques, and
provides higher code density than general-purpose ISAs
such as $ \times $86, MIPS, and GPGPU. Compared to the
latest state-of-the-art NN accelerator design DaDianNao
[5] (which can only accommodate 3 types of NN
techniques), our Cambricon-based accelerator prototype
implemented in TSMC 65nm technology incurs only
negligible latency/power/area overheads, with a
versatile coverage of 10 different NN benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Huang:2016:DLN,
author = "Ziqiang Huang and Andrew D. Hilton and Benjamin C.
Lee",
title = "Decoupling loads for nano-instruction set computers",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "406--417",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001181",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose an ISA extension that decouples the data
access and register write operations in a load
instruction. We describe system and hardware support
for decoupled loads. Furthermore, we show how compilers
can generate better static instruction schedules by
hoisting a decoupled load's data access above may-alias
stores and branches. We find that decoupled loads
improve performance with geometric mean speedups of
8.4\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Hayes:2016:FVM,
author = "Timothy Hayes and Oscar Palomar and Osman Unsal and
Adrian Cristal and Mateo Valero",
title = "Future vector microprocessor extensions for data
aggregations",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "418--430",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001182",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As the rate of annual data generation grows
exponentially, there is a demand to aggregate and
summarise vast amounts of information quickly. In the
past, frequency scaling was relied upon to push
application throughput. Today, Dennard scaling has
ceased and further performance must come from
exploiting parallelism. Single instruction-multiple
data (SIMD) instruction sets offer a highly efficient
and scalable way of exploiting data-level parallelism
(DLP). While microprocessors originally offered very
simple SIMD support targeted at multimedia
applications, these extensions have been growing both
in width and functionality. Observing this trend, we
use a simulation framework to model future SIMD support
and then propose and evaluate five different ways of
vectorising data aggregation. We find that although
data aggregation is abundant in DLP, it is often too
irregular to be expressed efficiently using typical
SIMD instructions. Based on this observation, we
propose a set of novel algorithms and SIMD instructions
to better capture this irregular DLP. Furthermore, we
discover that the best algorithm is highly dependent on
the characteristics of the input. Our proposed solution
can dynamically choose the optimal algorithm in the
majority of cases and achieves speedups between 2.7 $
\times $ and 7.6 $ \times $ over a scalar baseline.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Sleiman:2016:ESO,
author = "Faissal M. Sleiman and Thomas F. Wenisch",
title = "Efficiently scaling out-of-order cores for
simultaneous multithreading",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "431--443",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001183",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Simultaneous multithreading (SMT) out-of-order cores
waste a significant portion of structural out-of-order
core resources on instructions that do not need them.
These resources eliminate false ordering dependences.
However, because thread interleaving spreads dependent
instructions, nearly half of instructions dynamically
issue in program order after all false dependences have
resolved. These in-sequence instructions interleave
with other reordered instructions at a fine granularity
within the instruction window. We develop a technique
to efficiently scale in-flight instructions through a
hybrid out-of-order/in-order microarchitecture, which
can dispatch instructions to efficient in-order
scheduling mechanisms---using a FIFO issue queue called
the shelf ---on an instruction-by-instruction basis.
Instructions dispatched to the shelf do not allocate
out-of-order core resources in the reorder buffer,
issue queue, physical registers, or load-store queues.
We measure opportunity for such hybrid
microarchitectures and design and evaluate a practical
dispatch mechanism targeted at 4-threaded cores. Adding
a shelf to a baseline 4-thread system with 64-entry ROB
improves normalized system throughput by 11.5\% (up to
19.2\% at best) and energy-delay product by 10.9\% (up
to 17.5\% at best).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Hashemi:2016:ADC,
author = "Milad Hashemi and Khubaib and Eiman Ebrahimi and Onur
Mutlu and Yale N. Patt",
title = "Accelerating dependent cache misses with an enhanced
memory controller",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "444--455",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001184",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "On-chip contention increases memory access latency for
multicore processors. We identify that this additional
latency has a substantial effect on performance for an
important class of latency-critical memory operations:
those that result in a cache miss and are dependent on
data from a prior cache miss. We observe that the
number of instructions between the first cache miss and
its dependent cache miss is usually small. To minimize
dependent cache miss latency, we propose adding just
enough functionality to dynamically identify these
instructions at the core and migrate them to the memory
controller for execution as soon as source data arrives
from DRAM. This migration allows memory requests issued
by our new Enhanced Memory Controller (EMC) to
experience a 20\% lower latency than if issued by the
core. On a set of memory intensive quad-core workloads,
the EMC results in a 13\% improvement in system
performance and a 5\% reduction in energy consumption
over a system with a Global History Bufer prefetcher,
the highest performing prefetcher in our evaluation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Zhang:2016:TAS,
author = "Yunqi Zhang and David Meisner and Jason Mars and
Lingjia Tang",
title = "{Treadmill}: attributing the source of tail latency
through precise load testing and statistical
inference",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "456--468",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001186",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Managing tail latency of requests has become one of
the primary challenges for large-scale Internet
services. Data centers are quickly evolving and service
operators frequently desire to make changes to the
deployed software and production hardware
configurations. Such changes demand a confident
understanding of the impact on one's service, in
particular its effect on tail latency (e.g., 95th- or
99th-percentile response latency of the service).
Evaluating the impact on the tail is challenging
because of its inherent variability. Existing tools and
methodologies for measuring these effects suffer from a
number of deficiencies including poor load tester
design, statistically inaccurate aggregation, and
improper attribution of effects. As shown in the paper,
these pitfalls can often result in misleading
conclusions. In this paper, we develop a methodology
for statistically rigorous performance evaluation and
performance factor attribution for server workloads.
First, we find that careful design of the server load
tester can ensure high quality performance evaluation,
and empirically demonstrate the inaccuracy of load
testers in previous work. Learning from the design
flaws in prior work, we design and develop a modular
load tester platform, Treadmill, that overcomes
pitfalls of existing tools. Next, utilizing Treadmill,
we construct measurement and analysis procedures that
can properly attribute performance factors. We rely on
statistically-sound performance evaluation and quantile
regression, extending it to accommodate the
idiosyncrasies of server systems. Finally, we use our
augmented methodology to evaluate the impact of common
server hardware features with Facebook production
workloads on production hardware. We decompose the
effects of these features on request tail latency and
demonstrate that our evaluation methodology provides
superior results, particularly in capturing complicated
and counter-intuitive performance behaviors. By tuning
the hardware features as suggested by the attribution,
we reduce the 99th-percentile latency by 43\% and its
variance by 93\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Wu:2016:DFD,
author = "Qiang Wu and Qingyuan Deng and Lakshmi Ganesh and
Chang-Hong Hsu and Yun Jin and Sanjeev Kumar and Bin Li
and Justin Meza and Yee Jiun Song",
title = "{Dynamo}: facebook's data center-wide power management
system",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "469--480",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001187",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Data center power is a scarce resource that often goes
underutilized due to conservative planning. This is
because the penalty for overloading the data center
power delivery hierarchy and tripping a circuit breaker
is very high, potentially causing long service outages.
Recently, dynamic server power capping, which limits
the amount of power consumed by a server, has been
proposed and studied as a way to reduce this penalty,
enabling more aggressive utilization of provisioned
data center power. However, no real at-scale solution
for data center-wide power monitoring and control has
been presented in the literature. In this paper, we
describe Dynamo --- a data center-wide power management
system that monitors the entire power hierarchy and
makes coordinated control decisions to safely and
efficiently use provisioned data center power. Dynamo
has been developed and deployed across all of
Facebook's data centers for the past three years. Our
key insight is that in real-world data centers,
different power and performance constraints at
different levels in the power hierarchy necessitate
coordinated data center-wide power management. We make
three main contributions. First, to understand the
design space of Dynamo, we provide a characterization
of power variation in data centers running a diverse
set of modern workloads. This characterization uses
fine-grained power samples from tens of thousands of
servers and spanning a period of over six months.
Second, we present the detailed design of Dynamo. Our
design addresses several key issues not addressed by
previous simulation-based studies. Third, the proposed
techniques and design have been deployed and evaluated
in large scale data centers serving billions of users.
We present production results showing that Dynamo has
prevented 18 potential power outages in the past 6
months due to unexpected power surges; that Dynamo
enables optimizations leading to a 13\% performance
boost for a production Hadoop cluster and a nearly 40\%
performance increase for a search cluster; and that
Dynamo has already enabled an 8\% increase in the power
capacity utilization of one of our data centers with
more aggressive power subscription measures underway.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Wong:2016:PEA,
author = "Daniel Wong",
title = "Peak efficiency aware scheduling for highly energy
proportional servers",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "481--492",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001188",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Energy proportionality of data center severs have
improved drastically over the past decade to the point
where near ideal energy proportional servers are now
common. These highly energy proportional servers
exhibit the unique property where peak efficiency no
longer coincides with peak utilization. In this paper,
we explore the implications of this property on data
center scheduling. We identified that current state of
the art data center schedulers does not efficiently
leverage these properties, leading to inefficient
scheduling decisions. We propose Peak Efficiency Aware
Scheduling (PEAS) which can achieve better-than-ideal
energy proportionality at the data center level. We
demonstrate that PEAS can reduce average power by
25.5\% with 3.0\% improvement to TCO compared to
state-of-the-art scheduling policies.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Li:2016:PAD,
author = "Chao Li and Zhenhua Wang and Xiaofeng Hou and Haopeng
Chen and Xiaoyao Liang and Minyi Guo",
title = "Power attack defense: securing battery-backed data
centers",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "493--505",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001189",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Battery systems are crucial components for
mission-critical data centers. Without secure energy
backup, existing under-provisioned data centers are
largely unguarded targets for cyber criminals.
Particularly for today's scale-out servers, power
oversubscription unavoidably taxes a data center's
backup energy resources, leaving very little room for
dealing with emergency. Besides, the emerging trend
towards deploying distributed energy storage
architecture causes the associated energy backup of
each rack to shrink, making servers vulnerable to power
anomalies. As a result, an attacker can generate power
peaks to easily crash or disrupt a power-constrained
system. This study aims at securing data centers from
malicious loads that seek to drain their precious
energy storage and overload server racks without prior
detection. We term such load as Power Virus (PV) and
demonstrate its basic two-phase attacking model and
characterize its behaviors on real systems. The PV can
learn the victim rack's battery characteristics by
disguising as benign loads. Once gaining enough
information, the PV can be mutated to generate hidden
power spikes that have a high chance to overload the
system. To defend against PV, we propose power attack
defense (PAD), a novel energy management patch built on
lightweight software and hardware mechanisms. PAD not
only increases the attacking cost considerably by
hiding vulnerable racks from visible spikes, it also
strengthens the last line of defense against hidden
spikes. Using Google cluster traces we show that PAD
can effectively raise the bar of a successful power
attack: compared to prior arts, it increases the data
center survival time by 1.6~11X and provides better
performance guarantee. It enables modern data centers
to safely exploit the benefits that power
oversubscription may provide, with the slightest cost
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Gao:2016:DLP,
author = "Mingyu Gao and Christina Delimitrou and Dimin Niu and
Krishna T. Malladi and Hongzhong Zheng and Bob Brennan
and Christos Kozyrakis",
title = "{DRAF}: a low-power {DRAM}-based reconfigurable
acceleration fabric",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "506--518",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001191",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "FPGAs are a popular target for application-specific
accelerators because they lead to a good balance
between flexibility and energy efficiency. However,
FPGA lookup tables introduce significant area and power
overheads, making it difficult to use FPGA devices in
environments with tight cost and power constraints.
This is the case for datacenter servers, where a
modestly-sized FPGA cannot accommodate the large number
of diverse accelerators that datacenter applications
need. This paper introduces DRAF, an architecture for
bit-level reconfigurable logic that uses DRAM subarrays
to implement dense lookup tables. DRAF overlaps DRAM
operations like bitline precharge and charge
restoration with routing within the reconfigurable
routing fabric to minimize the impact of DRAM latency.
It also supports multiple configuration contexts that
can be used to quickly switch between different
accelerators with minimal latency. Overall, DRAF trades
off some of the performance of FPGAs for significant
gains in area and power. DRAF improves area density by
10x over FPGAs and power consumption by more than 3x,
enabling DRAF to satisfy demanding applications within
strict power and cost constraints. While accelerators
mapped to DRAF are 2-3x slower than those in FPGAs,
they still deliver a 13x speedup and an 11x reduction
in power consumption over a Xeon core for a wide range
of datacenter tasks, including analytics and
interactive services like speech recognition.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Zhang:2016:MWE,
author = "Lunkai Zhang and Brian Neely and Diana Franklin and
Dmitri Strukov and Yuan Xie and Frederic T. Chong",
title = "{Mellow Writes}: extending lifetime in resistive
memories through selective slow write backs",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "519--531",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001192",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging resistive memory technologies, such as PCRAM
and ReRAM, have been proposed as promising replacements
for DRAM-based main memory, due to their better
scalability, low standby power, and non-volatility.
However, limited write endurance is a major drawback
for such resistive memory technologies. Wear leveling
(balancing the distribution of writes) and wear
limiting (reducing the number of writes) have been
proposed to mitigate this disadvantage, but both
techniques only manage a fixed budget of writes to a
memory system rather than increase the number
available. In this paper, we propose a new type of wear
limiting technique, Mellow Writes, which reduces the
wearout of individual writes rather than reducing the
number of writes. Mellow Writes is based on the fact
that slow writes performed with lower dissipated power
can lead to longer endurance (and therefore longer
lifetimes). For non-volatile memories, an N$^1$ to
N$^3$ times endurance can be achieved if the write
operation is slowed down by N times. We present three
microarchitectural mechanisms ( Bank-Aware Mellow
Writes, Eager Mellow Writes, and Wear Quota ) that
selectively perform slow writes to increase memory
lifetime while minimizing performance impact. Assuming
a factor N$^2$ advantage in cell endurance for a factor
N slower write, our best Mellow Writes mechanism can
achieve 2.58$ \times $ lifetime and 1.06$ \times $
performance of the baseline system. In addition, its
performance is almost the same as a system aggressively
optimized for performance (at the expense of
endurance). Finally, Wear Quota guarantees a minimal
lifetime (e.g., 8 years) by forcing more slow writes in
presence of heavy workloads. We also perform
sensitivity analysis on the endurance advantage factor
for slow writes, from N$^1$ to N$^3$, and find that our
technique is still useful for factors as low as
N$^1$.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Zhou:2016:MMI,
author = "Yanqi Zhou and David Wentzlaff",
title = "{MITTS}: memory inter-arrival time traffic shaping",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "532--544",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001193",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory bandwidth severely limits the scalability and
performance of multicore and manycore systems.
Application performance can be very sensitive to both
the delivered memory bandwidth and latency. In
multicore systems, a memory channel is usually shared
by multiple cores. Having the ability to precisely
provision, schedule, and isolate memory bandwidth and
latency on a per-core basis is particularly important
when different memory guarantees are needed on a
per-customer, per-application, or per-core basis.
Infrastructure as a Service (IaaS) Cloud systems, and
even general purpose multicores optimized for
application throughput or fairness all benefit from the
ability to control and schedule memory access on a
fine-grain basis. In this paper, we propose MITTS
(Memory Inter-arrival Time Traffic Shaping), a simple,
distributed hardware mechanism which limits memory
traffic at the source (Core or LLC). MITTS shapes
memory traffic based on memory request inter-arrival
time, enabling fine-grain bandwidth allocation. In an
IaaS system, MITTS enables Cloud customers to express
their memory distribution needs and pay commensurately.
For instance, MITTS enables charging customers that
have bursty memory traffic more than customers with
uniform memory traffic for the same aggregate
bandwidth. Beyond IaaS systems, MITTS can also be used
to optimize for throughput or fairness in a general
purpose multi-program workload. MITTS uses an online
genetic algorithm to configure hardware bins, which can
adapt for program phases and variable input sets. We
have implemented MITTS in Verilog and have taped-out
the design in a 25-core 32nm processor and find that
MITTS requires less than 0.9\% of core area. We
evaluate across SPECint, PARSEC, Apache, and bhm Mail
Server workloads, and find that MITTS achieves an
average 1.18$ \times $ performance gain compared to the
best static bandwidth allocation, a 2.69$ \times $
average performance/cost advantage in an IaaS setting,
and up to 1.17$ \times $ better throughput and 1.52$
\times $ better fairness when compared to conventional
memory bandwidth provisioning techniques.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{SanMiguel:2016:AA,
author = "Joshua {San Miguel} and Natalie Enright Jerger",
title = "The anytime automaton",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "545--557",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001195",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Approximate computing is an emerging paradigm enabling
tradeoffs between accuracy and efficiency. However, a
fundamental challenge persists: state-of-the-art
techniques lack the ability to enforce runtime
guarantees on accuracy. The convention is to (1) employ
offline or online accuracy models, or (2) present
experimental results that demonstrate empirically low
error. Unfortunately, these approaches are still unable
to guarantee acceptability of all application outputs
at runtime. We offer a solution that revisits concepts
from anytime algorithms. Originally explored for
real-time decision problems, anytime algorithms have
the property of producing results with increasing
accuracy over time. We propose the Anytime Automaton, a
new computation model that executes applications as a
parallel pipeline of anytime approximations. An
automaton produces approximate versions of the
application output with increasing accuracy,
guaranteeing that the final precise version is
eventually reached. The automaton can be stopped
whenever the output is deemed acceptable; otherwise, it
is a simple matter of letting it run longer. We present
an in-depth analysis of the model and demonstrate
attractive runtime-accuracy profiles on various
applications. Our anytime automaton is the first step
towards systems where the acceptability of an
application's output directly governs the amount of
time and energy expended.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Wang:2016:AMR,
author = "Siyang Wang and Xiangyu Zhang and Yuxuan Li and Ramin
Bashizade and Song Yang and Chris Dwyer and Alvin R.
Lebeck",
title = "Accelerating {Markov} random field inference using
molecular optical {Gibbs} sampling units",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "558--569",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001196",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The increasing use of probabilistic algorithms from
statistics and machine learning for data analytics
presents new challenges and opportunities for the
design of computing systems. One important class of
probabilistic machine learning algorithms is Markov
Chain Monte Carlo (MCMC) sampling, which can be used on
a wide variety of applications in Bayesian Inference.
However, this probabilistic iterative algorithm can be
inefficient in practice on today's processors,
especially for problems with high dimensionality and
complex structure. The source of inefficiency is
generating samples from parameterized probability
distributions. This paper seeks to address this
sampling inefficiency and presents a new approach to
support probabilistic computing that leverages the
native randomness of Resonance Energy Transfer (RET)
networks to construct RET-based sampling units (RSU).
Although RSUs can be designed for a variety of
applications, we focus on the specific class of
probabilistic problems described as Markov Random Field
Inference. Our proposed RSU uses a RET network to
implement a molecular-scale optical Gibbs sampling unit
(RSU-G) that can be integrated into a processor / GPU
as specialized functional units or organized as a
discrete accelerator. We experimentally demonstrate the
fundamental operation of an RSU using a macro-scale
hardware prototype. Emulation-based evaluation of two
computer vision applications for HD images reveal that
an RSU augmented GPU provides speedups over a GPU of 3
and 16. Analytic evaluation shows a discrete
accelerator that is limited by 336 GB/s DRAM produces
speedups of 21 and 54 versus the GPU implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Huang:2016:EAA,
author = "Yipeng Huang and Ning Guo and Mingoo Seok and Yannis
Tsividis and Simha Sethumadhavan",
title = "Evaluation of an analog accelerator for linear
algebra",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "570--582",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001197",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Due to the end of supply voltage scaling and the
increasing percentage of dark silicon in modern
integrated circuits, researchers are looking for new
scalable ways to get useful computation from existing
silicon technology. In this paper we present a
reconfigurable analog accelerator for solving systems
of linear equations. Commonly perceived downsides of
analog computing, such as low precision and accuracy,
limited problem sizes, and difficulty in programming
are all compensated for using methods we discuss. Based
on a prototyped analog accelerator chip we compare the
performance and energy consumption of the analog solver
against an efficient digital algorithm running on a
CPU, and find that the analog accelerator approach may
be an order of magnitude faster and provide one third
energy savings, depending on the accelerator design.
Due to the speed and efficiency of linear algebra
algorithms running on digital computers, an analog
accelerator that matches digital performance needs a
large silicon footprint. Finally, we conclude that
problem classes outside of systems of linear equations
may hold more promise for analog acceleration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Wang:2016:LLA,
author = "Jin Wang and Norm Rubin and Albert Sidelnik and
Sudhakar Yalamanchili",
title = "{LaPerm}: locality aware scheduler for dynamic
parallelism on {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "583--595",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001199",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent developments in GPU execution models and
architectures have introduced dynamic parallelism to
facilitate the execution of irregular applications
where control flow and memory behavior can be
unstructured, time-varying, and hierarchical. The
changes brought about by this extension to the
traditional bulk synchronous parallel (BSP) model also
creates new challenges in exploiting the current GPU
memory hierarchy. One of the major challenges is that
the reference locality that exists between the parent
and child thread blocks (TBs) created during dynamic
nested kernel and thread block launches cannot be fully
leveraged using the current TB scheduling strategies.
These strategies were designed for the current
implementations of the BSP model but fall short when
dynamic parallelism is introduced since they are
oblivious to the hierarchical reference locality. We
propose LaPerm, a new locality-aware TB scheduler that
exploits such parent-child locality, both spatial and
temporal. LaPerm adopts three different scheduling
decisions to (i) prioritize the execution of the child
TBs, (ii) bind them to the stream multiprocessors
(SMXs) occupied by their parents TBs, and (iii)
maintain workload balance across compute units.
Experiments with a set of irregular CUDA applications
executed on a cycle-level simulator employing dynamic
parallelism demonstrate that LaPerm is able to achieve
an average of 27\% performance improvement over the
baseline round-robin TB scheduler commonly used in
modern GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Shahar:2016:ACS,
author = "Sagi Shahar and Shai Bergman and Mark Silberstein",
title = "{ActivePointers}: a case for software address
translation on {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "596--608",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001200",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern discrete GPUs have been the processors of
choice for accelerating compute-intensive applications,
but using them in large-scale data processing is
extremely challenging. Unfortunately, they do not
provide important I/O abstractions long established in
the CPU context, such as memory mapped files, which
shield programmers from the complexity of buffer and
I/O device management. However, implementing these
abstractions on GPUs poses a problem: the limited GPU
virtual memory system provides no address space
management and page fault handling mechanisms to GPU
developers, and does not allow modifications to memory
mappings for running GPU programs. We implement
ActivePointers, a software address translation layer
and paging system that introduces native support for
page faults and virtual address space management to GPU
programs, and enables the implementation of fully
functional memory mapped files on commodity GPUs. Files
mapped into GPU memory are accessed using active
pointers, which behave like regular pointers but access
the GPU page cache under the hood, and trigger page
faults which are handled on the GPU. We design and
evaluate a number of novel mechanisms, including a
translation cache in hardware registers and translation
aggregation for deadlock-free page fault handling of
threads in a single warp. We extensively evaluate
ActivePointers on commodity NVIDIA GPUs using
microbenchmarks, and also implement a complex image
processing application that constructs a photo collage
from a subset of 10 million images stored in a 40GB
file. The GPU implementation maps the entire file into
GPU memory and accesses it via active pointers. The use
of active pointers adds only up to 1\% to the
application's runtime, while enabling speedups of up to
3.9$ \times $ over a combined CPU+GPU implementation
and 2.6$ \times $ over a 12-core CPU-only
implementation which uses AVX vector instructions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Yoon:2016:VTM,
author = "Myung Kuk Yoon and Keunsoo Kim and Sangpil Lee and Won
Woo Ro and Murali Annavaram",
title = "Virtual thread: maximizing thread-level parallelism
beyond {GPU} scheduling limit",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "609--621",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001201",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern GPUs require tens of thousands of concurrent
threads to fully utilize the massive amount of
processing resources. However, thread concurrency in
GPUs can be diminished either due to shortage of thread
scheduling structures (scheduling limit), such as
available program counters and single instruction
multiple thread stacks, or due to shortage of on-chip
memory (capacity limit), such as register file and
shared memory. Our evaluations show that in practice
concurrency in many general purpose applications
running on GPUs is curtailed by the scheduling limit
rather than the capacity limit. Maximizing the
utilization of on-chip memory resources without unduly
increasing the scheduling complexity is a key goal of
this paper. This paper proposes a Virtual Thread (VT)
architecture which assigns Cooperative Thread Arrays
(CTAs) up to the capacity limit, while ignoring the
scheduling limit. However, to reduce the logic
complexity of managing more threads concurrently, we
propose to place CTAs into active and inactive states,
such that the number of active CTAs still respects the
scheduling limit. When all the warps in an active CTA
hit a long latency stall, the active CTA is context
switched out and the next ready CTA takes its place. We
exploit the fact that both active and inactive CTAs
still fit within the capacity limit which obviates the
need to save and restore large amounts of CTA state.
Thus VT significantly reduces performance penalties of
CTA swapping. By swapping between active and inactive
states, VT can exploit higher degree of thread level
parallelism without increasing logic complexity. Our
simulation results show that VT improves performance by
23.9\% on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Kim:2016:AIE,
author = "Jungrae Kim and Michael Sullivan and Sangkug Lym and
Mattan Erez",
title = "All-inclusive {ECC}: thorough end-to-end protection
for reliable computer memory",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "622--633",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001203",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Increasing transfer rates and decreasing I/O voltage
levels make signals more vulnerable to transmission
errors. While the data in computer memory are
well-protected by modern error checking and correcting
(ECC) codes, the clock, control, command, and address
(CCCA) signals are weakly protected or even unprotected
such that transmission errors leave serious gaps in
data-only protection. This paper presents All-Inclusive
ECC (AIECC), a memory protection scheme that leverages
and augments data ECC to also thoroughly protect CCCA
signals. AIECC provides strong end-to-end protection of
memory, detecting nearly 100\% of CCCA errors and also
preventing transmission errors from causing latent
memory data corruption. AIECC provides these
system-level benefits without requiring extra storage
and transfer overheads and without degrading the
effective level of data protection.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Duwe:2016:RUF,
author = "Henry Duwe and Xun Jian and Daniel Petrisko and Rakesh
Kumar",
title = "Rescuing uncorrectable fault patterns in on-chip
memories through error pattern transformation",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "634--644",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001204",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Voltage scaling can effectively reduce processor
power, but also reduces the reliability of the SRAM
cells in on-chip memories. Therefore, it is often
accompanied by the use of an error correcting code
(ECC). To enable reliable and efficient memory
operation at low voltages, ECCs for on-chip memories
must provide both high error coverage and low
correction latency. In this paper, we propose error
pattern transformation, a novel low-latency error
correction technique that allows on-chip memories to be
scaled to voltages lower than what has been previously
possible. Our technique relies on the observation that
the number of on-chip memory errors that many ECCs can
correct differs widely depending on the error patterns
in the logical words they protect. We propose
adaptively rearranging the logical bit to physical bit
mapping per word according to the BIST-detectable fault
pattern in the physical word. The adaptive logical bit
to physical bit mapping transforms many uncorrectable
error patterns in the logical words into correctable
error patterns and, therefore, improving on-chip memory
reliability. This reduces the minimum voltage at which
on-chip memory can run by 70mV over the best
low-latency ECC baseline, leading to a 25.7\% core-wide
power reduction for an ARM Cortex-A7-like core. Energy
per instruction is reduced by 15.7\% compared to the
best baseline.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Kim:2016:RMR,
author = "Dong Wan Kim and Mattan Erez",
title = "{RelaxFault} memory repair",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "645--657",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001205",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory system reliability is a serious concern in many
systems today, and is becoming more worrisome as
technology scales and system size grows. Stronger fault
tolerance capability is therefore desirable, but often
comes at high cost. In this paper, we propose a
low-cost, fault-aware, hardware-only resilience
mechanism, RelaxFault, that repairs the vast majority
of memory faults using a small amount of the LLC to
remap faulty memory locations. RelaxFault requires less
than 100KiB of LLC capacity, has near-zero impact on
performance and power. By repairing faults, RelaxFault
relaxes the requirement for high fault tolerance of
other mechanisms, such as ECC. A better tradeoff
between resilience and overhead is made by exploiting
an understanding of memory system architecture and
fault characteristics. We show that RelaxFault provides
better repair capability than prior work of similar
cost, improves memory reliability to a greater extent,
and significantly reduces the number of maintenance
events and memory module replacements. We also propose
a more refined memory fault model than prior work and
demonstrate its importance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Pothukuchi:2016:UMI,
author = "Raghavendra Pradyumna Pothukuchi and Amin Ansari and
Petros Voulgaris and Josep Torrellas",
title = "Using multiple input, multiple output formal control
to maximize resource efficiency in architectures",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "658--670",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001207",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As processors seek more resource efficiency, they
increasingly need to target multiple goals at the same
time, such as a level of performance, power
consumption, and average utilization. Robust control
solutions cannot come from heuristic-based controllers
or even from formal approaches that combine multiple
single-parameter controllers. Such controllers may
end-up working against each other. What is needed is
control-theoretical MIMO (multiple input, multiple
output) controllers, which actuate on multiple inputs
and control multiple outputs in a coordinated manner.
In this paper, we use MIMO control-theory techniques to
develop controllers to dynamically tune architectural
parameters in processors. To our knowledge, this is the
first work in this area. We discuss three ways in which
a MIMO controller can be used. We develop an example of
MIMO controller and show that it is substantially more
effective than controllers based on heuristics or built
by combining single-parameter formal controllers. The
general approach discussed here is likely to be
increasingly relevant as future processors become more
resource-constrained and adaptive.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Cherupalli:2016:EDT,
author = "Hari Cherupalli and Rakesh Kumar and John Sartori",
title = "Exploiting dynamic timing slack for energy efficiency
in ultra-low-power embedded systems",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "671--681",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001208",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many emerging applications such as the internet of
things, wearables, and sensor networks have
ultra-low-power requirements. At the same time, cost
and programmability considerations dictate that many of
these applications will be powered by general purpose
embedded microprocessors and microcontrollers, not
ASICs. In this paper, we exploit a new opportunity for
improving energy efficiency in ultra-low-power
processors expected to drive these applications ---
dynamic timing slack. Dynamic timing slack exists when
an embedded software application executed on a
processor does not exercise the processor's static
critical paths. In such scenarios, the longest path
exercised by the application has additional timing
slack which can be exploited for power savings at no
performance cost by scaling down the processor's
voltage at the same frequency until the longest
exercised paths just meet timing constraints. Paths
that cannot be exercised by an application can safely
be allowed to violate timing constraints. We show that
dynamic timing slack exists for many ultra-low-power
applications and that exploiting dynamic timing slack
can result in significant power savings for many
ultra-low-power processors. We also present an
automated methodology for identifying dynamic timing
slack and selecting a safe operating point for a
processor and a particular embedded software. Our
approach for identifying and exploiting dynamic timing
slack is non-speculative, requires no programmer
intervention and little or no hardware support, and
demonstrates potential power savings of up to 32\%,
25\% on average, over a range of embedded applications
running on a common ultra-low-power processor, at no
performance cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Zhou:2016:CSI,
author = "Yanqi Zhou and Henry Hoffmann and David Wentzlaff",
title = "{CASH}: supporting {IaaS} customers with a sub-core
configurable architecture",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "682--694",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001209",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Infrastructure as a Service (IaaS) Clouds have grown
increasingly important. Recent architecture designs
support IaaS providers through fine-grain
configurability, allowing providers to orchestrate
low-level resource usage. Little work, however, has
been devoted to supporting IaaS customers who must
determine how to use such fine-grain configurable
resources to meet quality-of-service (QoS) requirements
while minimizing cost. This is a difficult problem
because the multiplicity of configurations creates a
non-convex optimization space. In addition, this
optimization space may change as customer applications
enter and exit distinct processing phases. In this
paper, we overcome these issues by proposing CASH: a
fine-grain configurable architecture co-designed with a
cost-optimizing runtime system. The hardware
architecture enables configurability at the granularity
of individual ALUs and L2 cache banks and provides
unique interfaces to support low-overhead, dynamic
configuration and monitoring. The runtime uses a
combination of control theory and machine learning to
configure the architecture such that QoS requirements
are met and cost is minimized. Our results demonstrate
that the combination of fine-grain configurability and
non-convex optimization provides tremendous cost
savings (70\% savings) compared to coarse-grain
heterogeneity and heuristic optimization. In addition,
the system is able to customize configurations to
particular applications, respond to application phases,
and provide near optimal cost for QoS targets.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Arjomand:2016:BAP,
author = "Mohammad Arjomand and Mahmut T. Kandemir and Anand
Sivasubramaniam and Chita R. Das",
title = "Boosting access parallelism to {PCM}-based main
memory",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "695--706",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001211",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Despite its promise as a DRAM main memory replacement,
Phase Change Memory (PCM) has high write latencies
which can be a serious detriment to its widespread
adoption. Apart from slowing down a write request, the
consequent high latency can also keep other chips of
the same rank, that are not involved in this write,
idle for long times. There are several practical
considerations that make it difficult to allow
subsequent reads and/or writes to be served
concurrently from the same chips during the long
latency write. This paper proposes and evaluates
several novel mechanisms --- re-constructing data from
error correction bits instead of waiting for chips
currently busy to serve a read, rotating word mappings
across chips of a PCM rank, and rotating the mapping of
error detection/correction bits across these chips ---
to overlap several reads with an ongoing write (RoW)
and even a write with an ongoing write (WoW). The paper
also presents the necessary micro-architectural
enhancements needed to implement these mechanisms,
without significantly changing the current interfaces.
The resulting PCM access parallelism (PCMap) system
incorporating these enhancements, boosts the
intra-rank-level parallelism during such writes from a
very low baseline value of 2.4 to an average and
maximum values of 4.5 and 7.4, respectively (out of a
maximum of 8.0), across a wide spectrum of both
multiprogrammed and multithreaded workloads. This boost
in parallelism results in an average IPC improvement of
15.6\% and 16.7\% for the multiprogrammed and
multithreaded workloads, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Gandhi:2016:APE,
author = "Jayneel Gandhi and Mark D. Hill and Michael M. Swift",
title = "Agile paging: exceeding the best of nested and shadow
paging",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "707--718",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001212",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Virtualization provides benefits for many workloads,
but the overheads of virtualizing memory are not
universally low. The cost comes from managing two
levels of address translation---one in the guest
virtual machine (VM) and the other in the host virtual
machine monitor (VMM)---with either nested or shadow
paging. Nested paging directly performs a two-level
page walk that makes TLB misses slower than
unvirtualized native, but enables fast page tables
changes. Alternatively, shadow paging restores native
TLB miss speeds, but requires costly VMM intervention
on page table updates. This paper proposes agile paging
that combines both techniques and exceeds the best of
both. A virtualized page walk starts with shadow paging
and optionally switches in the same page walk to nested
paging where frequent page table updates would cause
costly VMM interventions. Agile paging enables most TLB
misses to be handled as fast as native while most page
table changes avoid VMM intervention. It requires
modest changes to hardware (e.g., demark when to
switch) and VMM policies (e.g., predict good switching
opportunities). We emulate the proposed hardware and
prototype the software in Linux with KVM on x86-64.
Agile paging performs more than 12\% better than the
best of the two techniques and comes within 4\% of
native execution for all workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Seol:2016:EED,
author = "Hoseok Seol and Wongyu Shin and Jaemin Jang and
Jungwhan Choi and Jinwoong Suh and Lee-Sup Kim",
title = "Energy efficient data encoding in {DRAM} channels
exploiting data value similarity",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "719--730",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001213",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As DRAM data bandwidth increases, tremendous energy is
dissipated in the DRAM data bus. To reduce the energy
consumed in the data bus, DRAM interfaces with
asymmetric termination, such as Pseudo Open Drain (POD)
and Low Voltage Swing Terminated Logic (LVSTL), have
been adopted in modern DRAMs. In interfaces using
asymmetric termination, the amount of termination
energy is proportional to the hamming weight of the
data words. In this work, we propose Bitwise Difference
Encoding (BD-Encoding), which decreases the hamming
weight of data words, leading to a reduction in energy
consumption in the modern DRAM data bus. Since smaller
hamming weight of the data words also reduces switching
activity, switching energy and power noise are also
both reduced. BD-Encoding exploits the similarity in
data words in the DRAM data bus. We observed that
similar data words (i.e. data words whose hamming
distance is small) are highly likely to be sent over at
similar times. Based on this observation, BD-coder
stores the data recently sent over in both the memory
controller and DRAMs. Then, BD-coder transfers the
bitwise difference between the current data and the
most similar data. In an evaluation using SPEC 2006,
BD-Encoding using 64 recent data reduced termination
energy by 58.3\% and switching energy by 45.3\%. In
addition, 55\% of the LdI/dt noise was decreased with
BD-Encoding.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ISCA '16 conference proceedings.",
}
@Article{Sheng:2016:CCF,
author = "Jiayi Sheng and Qingqing Xiong and Chen Yang and
Martin C. Herbordt",
title = "Collective Communication on {FPGA} Clusters with
Static Scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "2--7",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039904",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "FPGA-centric clouds and clusters provide direct and
programmable interconnects with obvious benefits for
communication latency and bandwidth. One rarely studied
aspect of DPI is that they facilitate application-aware
routing: if communication patterns are static and known
a priori, as is usually the case, then judicious
routing can reduce congestion, latency, and the
hardware required. In this study we explore applying
the method of offline/static routing to collective
operations, in particular, multicast and reduction. An
entirely new communication infrastructure is proposed
and implemented, including switch design and routing
algorithm. A substantial improvement in performance is
obtained, especially for multicast. We believe that
this is one of the few general offline/static routing
solutions for real HPC clusters, and FPGA-centric
clusters in particular.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Mashimo:2016:CEH,
author = "Susumu Mashimo and Thiem Van Chu and Kenji Kise",
title = "Cost-Effective and High-Throughput Merge Network:
Architecture for the Fastest {FPGA} Sorting
Accelerator",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "8--13",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039905",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "High-performance sorting is used in various areas such
as database transactions and genomic feature
operations. To improve sorting performance, in addition
to the conventional approach of using general purpose
processors or GPUs, the approach of using FPGAs is
becoming a promising solution. As an FPGA sorting
accelerator, Casper and Olukotun have recently proposed
the fastest one known so far. In their study, they
proposed a merge network which can merge two sorted
data series at a throughput of 6 data elements per
200MHz clock cycle. If an FPGA sorting accelerator is
constructed using merge networks, the overall
throughput will be mainly determined by the throughputs
of the merge networks. This motivates us to design a
merge network which outputs more than 6 data elements
per 200MHz clock cycle. In this paper, we propose a
cost-effective and high-throughput merge network for
the fastest FPGA sorting accelerator. The evaluation
shows that our proposal achieves a throughput of 8 data
elements per 200MHz clock cycle.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Pham-Quoc:2016:FBM,
author = "Cuong Pham-Quoc and Biet Nguyen and Tran Ngoc Thinh",
title = "{FPGA}-based Multicore Architecture for Integrating
Multiple {DDoS} Defense Mechanisms",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "14--19",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039906",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper proposes an FPGA-based multicore
architecture to integrate multiple DDoS defense
mechanisms for DDoS protection. The architecture allows
multiple cooperating DDoS mitigation techniques to
classify incoming network packets. The proposed
architecture consists of two separate partitions static
and dynamic. The static partition includes packet
pre-processing and post-processing modules while the
DDoS filtering techniques are implemented within the
dynamic partition. These filtering techniques can be
implemented by either hardware custom computing cores
or general purpose soft processors or both. In all
cases, these DDoS filtering computing cores can be
updated or changed at runtime or design time. We
implement our first prototype system with the Hop-count
filtering and Ingress/Engress filtering techniques
using the Xilinx Virtex 5 xc5vtx240t FPGA device. The
synthesis results show that the system can work at up
to 116.782MHz while utilizing about 41\% LUTs, 47\%
Registers, and 53\% Block Memory of the available
hardware resources. Experimental results show that our
system achieves a 100\% detection rate (true positive)
with a 0\% false negative rate and the maximum 0.74\%
false positive rate. Moreover, the prototype system
obtains packet processing throughput by up to 9.869
Gbps in half-duplex mode and 19.738 Gbps in full-duplex
mode.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Eslami:2016:IOM,
author = "Fatemeh Eslami and Steven J. E. Wilton",
title = "An Improved Overlay and Mapping Algorithm Supporting
Rapid Triggering for {FPGA} Debug",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "20--25",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039907",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Embedded system designers can benefit from FPGA
accelerators to achieve higher performance and
efficiency. However, there are challenges that do not
exist in software development; using software
simulators to validate large and complex hardware
designs can be extremely slow and impractical.
Debugging designs implemented on an FPGA enables
running the design at speed for long runs and more
exhaustive test cases. However, limited observability
is the primary challenge in hardware debug. To enhance
hardware observability, trace-buffers and a trigger
circuitry are inserted into the design. During the
device operation, a history of signals of interest is
recorded into the trace-buffers for off-line debug and
validation. Recompiling the design every time the
designer wishes to modify the trigger condition results
in long debug turn-around times and reduced
productivity. In this work, we present a
pre-synthesized overlay fabric and algorithm to enable
rapid triggering; during debug turn-around,
TriggerPlus, a greedy algorithm, is used to implement a
trigger circuit on the overlay. TriggerPlus is fast and
simple, yet still capable of mapping the trigger
circuit to the overlay fabric. We evaluate our
techniques using VPR, showing that using our overlay
and mapping algorithm together is at least an order of
magnitude faster than the previous work resulting in a
significant reduction in debug turn-around times.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Kobayashi:2016:HSV,
author = "Ryohei Kobayashi and Tomohiro Misono and Kenji Kise",
title = "A High-speed {Verilog} {HDL} Simulation Method using a
Lightweight Translator",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "26--31",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039908",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Designing with Hardware Description Languages (HDLs)
is still the de facto standard way to develop
FPGA-based custom computing systems, and RTL simulation
is an important step in ensuring that the designed
hardware behavior meets the design specification. In
this paper, we propose a new high-speed Verilog HDL
simulation method. It is based on two previously
proposed techniques: ArchHDL and Pyverilog. ArchHDL is
used as a simulation engine in the method because the
RTL simulation provided by ArchHDL can be parallelized
with OpenMP. We use Pyverilog to develop a code
translator to convert Verilog HDL source code into
ArchHDL code, and due to this, the translator can be
realized and its implementation is lightweight. We
compare the proposed method with Synopsys VCS, and the
experimental results show that the RTL simulation
behavior and speed are same as that of Synopsys VCS and
up to 5.8x better respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Sassa:2016:FSP,
author = "Shohei Sassa and Kenji Kanazawa and Shaowei Cai and
Moritoshi Yasunaga",
title = "An {FPGA} Solver for Partial {MaxSAT} Problems Based
on Stochastic Local Search",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "32--37",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039909",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper, we propose an FPGA solver for partial
maximum satisfiability (PMS) problems based on the Dist
algorithm, which is one of the best performing
stochastic local search algorithms for PMS problems.
The Dist algorithm searches for a truth assignment for
the variables that satisfies all of the hard clauses
and as many soft clauses as possible by iteratively
selecting a variable using a heuristic and flipping its
truth value. During each iteration, new candidate
variables for flipping are generated and existing ones
may disappear. In our solver, the variables that may
become new candidates for flipping are evaluated by
parallel and pipeline processing, and then only the
variables that actually become the candidates for
flipping are extracted and gathered up in concurrent
with the pipeline processing. The extraction process is
not influenced by the number of the new candidates or
their random generation, which minimizes the
disturbance of the parallel and pipeline processing.
Our FPGA solver can solve large PMS problems up to 7.74
times faster than running Dist on CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Houtgast:2016:EGI,
author = "Ernst Joachim Houtgast and VladMihai Sima and Koen
Bertels and Zaid AlArs",
title = "An Efficient {GPUAccelerated} Implementation of
Genomic Short Read Mapping with {BWAMEM}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "38--43",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039910",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Next Generation Sequencing techniques have resulted in
an exponential growth in the generation of genetics
data, the amount of which will soon rival, if not
overtake, other Big Data fields, such as astronomy and
streaming video services. To become useful, this data
requires processing by a complex pipeline of
algorithms, taking multiple days even on large
clusters. The mapping stage of such genomics pipelines,
which maps the short reads onto a reference genome,
takes up a significant portion of execution time.
BWA-MEM is the de-facto industry-standard for the
mapping stage. Here, a GPU-accelerated implementation
of BWA-MEM is proposed. The Seed Extension phase, one
of the three main BWA-MEM algorithm phases that
requires between 30\%-50\% of overall processing time,
is offloaded onto the GPU. A thorough design space
analysis is presented for an optimized mapping of this
phase onto the GPU. The resulting systolic-array based
implementation obtains a two-fold overall
application-level speedup, which is the maximum
theoretically achievable speedup. Moreover, this
speedup is sustained for systems with up to twenty-two
logical cores. Based on the findings, a number of
suggestions are made to improve GPU architecture,
resulting in potentially greatly increased performance
for bioinformatics-class algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Nakahara:2016:FCS,
author = "Hiroki Nakahara and Hiroyuki Nakanishi and Kazumasa
Iwai and Tsutomu Sasao",
title = "An {FFT} Circuit for a Spectrometer of a Radio
Telescope using the Nested {RNS} including the Constant
Division",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "44--49",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039911",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A radio telescope analyzes radio frequency (RF)
received from celestial objects. It consists of an
antenna, a receiver, and a spectrometer. The
spectrometer converts the time domain into the
frequency domain by an FFT operation. This paper
applies an FFT circuit based on nested residue number
system (NRNS), which recursively decompose the RNS. It
can decompose the MAC unit into circuits with small
sizes. In the FFT using the NRNS, a MAC unit is
decomposed into 4-bit ones realized by look-up tables
of the FPGA. Also, to realize the scaling (truncation)
circuit, we propose a constant division algorithm on
the FPGA. The truncation is realized by the division of
a dynamic range for a subset of moduli. We implemented
the proposed NRNS FFT on the Xilinx Inc. Virtex 6 FPGA.
Compared with a Xilinx Inc. binary FFT library,
although the number of block RAMs (BRAMs) was increased
by 38\%, in the RNS FFT, the number of LUTs was
decreased by 42-45\% and the maximum clock frequency
was increased by 38-74\%. With this technique, we
successfully implemented an FFT that satisfied the
required size and speed specifications on an available
FPGA, since the excessive number of LUTs was the
bottleneck of the binary FFT.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Pangracious:2016:NTD,
author = "Vinod Pangracious and Mulhim Al-Doori",
title = "Novel Three-Dimensional Embedded {FPGA} Technology and
Achitecture",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "50--55",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039912",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In this paper we present a high density
three-dimensional (3D) interconnect network
implementation based on a modified Mesh-of-Trees (MoT)
topology for an embedded FPGA architecture design
targeted for high performance 3D integration. To obtain
the optimal MoT-based interconnect structure, the
routing architecture of the 2D MoT-based FPGA is
modified to include long routing segments that span
multiple switch blocks in every row and column. By
adjusting the percentage of long wire and span, a 2.5D
or 3D high density MoT-based embedded FPGAs can be
designed. For the 3D multi-stacked MoT-based FPGAs, the
2D MoTbased FPGA is sliced into two or more equal
sections by adjusting the length of the long wire span.
The long wire segments are realized using 3D through
silicon via (TSVs) and 2.5D interposer-based
multi-FPGAs, we increase the number of cuts and apply
appropriate optimization models to scale down the
number of long wires and horizontal inter-FPGA
interposer wires. Using our 2.5/3D CAD models, we
demonstrate the speed and area of 3D MoT-based FPGA
architecture improved by 54\% and 41\% respectively in
comparison to 3D Mesh-based FPGAs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Knodel:2016:MLR,
author = "Oliver Knodel and Paul R. Genssler and Rainer G.
Spallek",
title = "Migration of long-running Tasks between Reconfigurable
Resources using Virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "56--61",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039913",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Computing performance and scalability are the
essential basics in modern data centres. Field
Programmable Gate Arrays (FPGAs) provide a promising
opportunity to improve performance, security and energy
efficiency. Especially background acceleration of
computationally complex and long-running tasks is an
important field of application. A flexible use of
reconfigurable devices within a cloud context requires
an abstraction of the actual hardware through
virtualization. In this paper we present an approach
inspired by paravirtualized machines for the
integration of reconfigurable hardware into cloud
services. Using partial reconfiguration our hardware
and software framework virtualizes a single physical
FPGA to enable multiple independent user designs.
Essential components are the management of those
virtual user-defined accelerators (vFPGA) and their
migration between physical FPGAs to achieve higher
system-wide utilization. The migration requires saving
and restoring the internal state or context of the
vFPGA. We demonstrate the application possibilities and
the resource trade-off of our approach by transferring
a running design from one physical FPGA to another.
Moreover, we present future perspectives for the use of
FPGAs in cloud-based environments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Tada:2016:ESG,
author = "Jubee Tada and Maiki Hosokawa and Ryusuke Egawa and
Hiroaki Kobayashi",
title = "Effects of Stacking Granularity on {$3$-D} Stacked
Floating-point Fused Multiply Add Units",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "62--67",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039914",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Three-dimensional stacked integrated circuits
(3D-SICs) have been expected to overcome the
limitations of conventional two-dimensional (2-D)
implemented circuits. Since a stacking strategy affects
the performance and the power consumption of 3D-SICs,
this paper examines two stacking strategies for
designing the 3-D stacked floating-point fused
multiply-add (FP-FMA) module which contains four FP-FMA
units. Experimental results show that a coarse-grain
stacking strategy is suitable for reducing critical
path delay of the 3-D stacked FP-FMA module. On the
other hand, a fine-grain stacking strategy is suitable
for reducing power consumption. The 3-D stacked FP-FMA
module which is designed based on a fine-grain stacking
strategy achieves an 8.4\% critical path delay
reduction and an 18\% average power reduction compared
with the 2-D implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
keywords = "fused multiply-add (FMA) instruction",
remark = "HEART '16 conference proceedings.",
}
@Article{Su:2016:NNB,
author = "Jiang Su and Jianxiong Liu and David B. Thomas and
Peter Y. K. Cheung",
title = "Neural Network Based Reinforcement Learning
Acceleration on {FPGA} Platforms",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "68--73",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039915",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Deep Q-learning (DQN) is a recently proposed
reinforcement learning algorithm where a neural network
is applied as a non-linear approximator to its value
function. The exploitation-exploration mechanism allows
the training and prediction of the NN to execute
simultaneously in an agent during its interaction with
the environment. Agents often act independently on
battery power, so the training and prediction must
occur within the agent and on a limited power budget.
In this work, We propose an FPGA acceleration system
design for Neural Network Q-learning (NNQL). Our
proposed system has high flexibility due to the support
to run-time network parameterization, which allows
neuroevolution algorithms to dynamically restructure
the network to achieve better learning results.
Additionally, the power consumption of our proposed
system is adaptive to the network size because of a new
processing element design. Based on our test cases on
networks with hidden layer size ranging from 32 to
16384, our proposed system achieves 7x to 346x speedup
compared to GPU implementation and 22x to 77x speedup
to hand-coded CPU counterpart.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{DHollander:2016:HLS,
author = "Erik H. D'Hollander",
title = "High-Level Synthesis Optimization for Blocked
Floating-Point Matrix Multiplication",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "74--79",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039916",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In the last decade floating-point matrix
multiplication on FPGAs has been studied extensively
and efficient architectures as well as detailed
performance models have been developed. By design these
IP cores take a fixed footprint which not necessarily
optimizes the use of all available resources. Moreover,
the low-level architectures are not easily amenable to
a parameterized synthesis. In this paper high-level
synthesis is used to fine-tune the configuration
parameters in order to achieve the highest performance
with maximal resource utilization. An\ exploration
strategy is presented to optimize the use of critical
resources (DSPs, memory) for any given FPGA. To account
for the limited memory size on the FPGA, a
block-oriented matrix multiplication is organized such
that the block summation is done on the CPU while the
block multiplication occurs on the logic fabric
simultaneously. The communication overhead between the
CPU and the FPGA is minimized by streaming the blocks
in a Gray code ordering scheme which maximizes the data
reuse for consecutive block matrix product
calculations. Using highlevel synthesis optimization,
the programmable logic operates at 93\% of the
theoretical peak performance and the combined CPU-FPGA
design achieves 76\% of the available hardware
processing speed for the floating-point multiplication
of 2K by 2K matrices.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Li:2016:FBV,
author = "Chengzhe Li and Lai Yoong Yee and Hiroshi Maruyama and
Yoshiki Yamaguchi",
title = "{FPGA}-based Volleyball Player Tracker",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "80--86",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039917",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The significant challenge facing sport science is how
to grasp the flow of the game and analyze the situation
of a match. The use of information technology will
facilitate to achieve the goal. The technical issues
from the practical application perspective can be
classified into three main points: computation speed,
system size and complex data analysis considering the
accuracy. In this paper, for accelerating image
recognition and object tracking, we propose a
one-dimensional data pipeline architecture on a
field-programmable gate array (FPGA). It satisfies both
of high-speed streaming computation and small-sized
circuits by considering spatiotemporal data dependence.
Volleyball games have been chosen as a target
application. The proposed system will identify the
position of six volleyball players within real time.
The design on an FPGA includes pre-processing, color
filtering, digitalization, noise reduction, template
matching, and so on. The design was implemented and
evaluated on Atlys Spartan-6 FPGA Trainer Board with
one XILINX Spartan-6 LX45 FPGA. The computational
performance achieves 100 frames per second at SVGA 800
by 600 pixel resolution. And our design has good
scalability; the performance can easily be enhanced
when the larger FPGA is used. The proposed system is
also compact, which is composed of one Atlys board and
one Atlys VmodCAM stereo-camera board. The
average-accuracy rates of pregame situation and during
a match are 87.1\% and 65.7\%, respectively. Since the
input is streaming data, we can improve the accuracy by
considering the previous and the next frames. They
could be improved to 90.4\% and 72.2\%, respectively,
when we adopt template matching with a moving average
filter.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Zhao:2016:SHC,
author = "Qian Zhao and Motoki Amagasaki and Masahiro Iida and
Morihiro Kuga and Toshinori Sueyoshi",
title = "A Study of Heterogeneous Computing Design Method based
on Virtualization Technology",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "86--91",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039918",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "One challenge for the heterogeneous computing with the
FPGA is how to bridge the development gap between SW
and HW designs. The high level synthesis (HLS)
technique allows producing hardware with high level
languages like C. Design tools based on the HLS like
Xilinx SDSoC and SDAccel are developed to speedup SW/HW
co-designs. However, the developers still require much
circuit design skills to use these tools more
efficiently. In this paper, we propose a heterogeneous
computing platform based on the virtualization
technology, namely hCODE.With the help of the
virtualization, the HW and SW design can be totally
separated. This brings multiple benefits like
accelerating a program without modifying or recompiling
it, enable high portability and scalability across
different HW and operating system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Lin:2016:FHL,
author = "Colin Yu Lin and Zhenghong Jiang and Cheng Fu and
Hayden Kwok-Hay So and Haigang Yang",
title = "{FPGA} High-level Synthesis versus Overlay:
Comparisons on Computation Kernels",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "92--97",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039919",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:57 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To promote FPGA to a wider user community and to
increase design productivity, two new design
methodologies, namely FPGA high-level synthesis (HLS)
and FPGA overlay, are presented to use a high-level
design abstraction. To make clear distinguish features
of each design methodology, we make an comparison of a
state-of-the-art FPGA HLS tool, Vivado HLS, and an FPGA
overlay tool, ArchSyn, on two computation intensive
kernels, matrix-matrix multiplication and fast Fourier
transform. In the comparison, FPGA overlay shows an
overwhelming superiority in computation performance,
which is 8X to 39X faster than FPGA HLS. However, FPGA
HLS exhibits its advantages in dynamic power
consumption metric. It achieves up to 17X lower power
consumption than FPGA overlay. Power- and
energy-efficiency are another two essential metrics
evaluating trade-offs between performance and power
consumption. As demonstrated with evaluation results,
FPGA overlay is averagely 3.5X better in
power-efficiency for FFT kernel, and achieves up to 2
orders of magnitude better energy-efficiency than FPGA
HLS.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "HEART '16 conference proceedings.",
}
@Article{Zhan:2016:PMB,
author = "Xusheng Zhan and Yungang Bao and Christian Bienia and
Kai Li",
title = "{PARSEC3.0}: a Multicore Benchmark Suite with Network
Stacks and {SPLASH-2X}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "5",
pages = "1--16",
month = dec,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3053277.3053279",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Benchmarks play a very important role in accelerating
the development and research of CMP. As one of them,
the PARSEC suite continues to be updated and revised
over and over again so that it can offer better support
for researchers. The former versions of PARSEC have
enough workloads to evaluate the property of CMP about
CPU, cache and memory, but it lacks of applications
based on network stack to assess the performance of
CMPs in respect of network. In this work, we introduce
PARSEC3.0, a new version of PARSEC suite that
implements a user-level network stack and generates
three network workloads with this stack to cover
network domain. We explore the input sets of splash-2
and expand them to multiple scales, a.k.a, splash-2x.
We integrate splash-2 and splash-2x into PARSEC
framework so that researchers use these benchmark suite
conveniently. Finally, we evaluate the u-TCP/IP stack
and new network workloads, and analyze the
characterizes of splash-2 and splash-2x",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2017:BDA,
author = "Yunji Chen",
title = "Big Data Analytics and Intelligence at Alibaba Cloud",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "1--1",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037699",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As China's largest cloud service provider, Alibaba
Cloud has been one of the fastest growing cloud
computing platforms in the world. In this talk, I-ll
present an overview of Big Data and AI computing
platform at Alibaba Cloud, which consists of a wide
range of products and services to enable fast and
efficient big data development and intelligent
analysis. The underlying computing infrastructure
supports a variety of computation scenarios, including
batch, interactive, stream, and graph computation, as
well as large-scale machine learning on heterogeneous
cloud-scale data centers. Several big data products,
such as rule-based engine, recommendation system, BI
tools, etc., are provided to address different business
needs. The platform not only supports Alibaba's
internal businesses but also provides solid services to
enterprise customers. In addition, I'll describe key
techniques and system internals, and outline
outstanding research and engineering challenges.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Cherupalli:2017:DAS,
author = "Hari Cherupalli and Henry Duwe and Weidong Ye and
Rakesh Kumar and John Sartori",
title = "Determining Application-specific Peak Power and Energy
Requirements for Ultra-low Power Processors",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "3--16",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037711",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many emerging applications such as IoT, wearables,
implantables, and sensor networks are power- and
energy-constrained. These applications rely on
ultra-low-power processors that have rapidly become the
most abundant type of processor manufactured today. In
the ultra-low-power embedded systems used by these
applications, peak power and energy requirements are
the primary factors that determine critical system
characteristics, such as size, weight, cost, and
lifetime. While the power and energy requirements of
these systems tend to be application-specific,
conventional techniques for rating peak power and
energy cannot accurately bound the power and energy
requirements of an application running on a processor,
leading to over-provisioning that increases system size
and weight. In this paper, we present an automated
technique that performs hardware-software co-analysis
of the application and ultra-low-power processor in an
embedded system to determine application-specific peak
power and energy requirements. Our technique provides
more accurate, tighter bounds than conventional
techniques for determining peak power and energy
requirements, reporting 15\% lower peak power and 17\%
lower peak energy, on average, than a conventional
approach based on profiling and guardbanding. Compared
to an aggressive stressmark-based approach, our
technique reports power and energy bounds that are 26\%
and 26\% lower, respectively, on average. Also, unlike
conventional approaches, our technique reports
guaranteed bounds on peak power and energy independent
of an application's input set. Tighter bounds on peak
power and energy can be exploited to reduce system
size, weight, and cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Chen:2017:PPQ,
author = "Quan Chen and Hailong Yang and Minyi Guo and Ram
Srivatsa Kannan and Jason Mars and Lingjia Tang",
title = "{Prophet}: Precise {QoS} Prediction on Non-Preemptive
Accelerators to Improve Utilization in Warehouse-Scale
Computers",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "17--32",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037700",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Guaranteeing Quality-of-Service (QoS) of
latency-sensitive applications while improving server
utilization through application co-location is
important yet challenging in modern datacenters. The
key challenge is that when applications are co-located
on a server, performance interference due to resource
contention can be detrimental to the application QoS.
Although prior work has proposed techniques to identify
``safe'' co-locations where application QoS is
satisfied by predicting the performance interference on
multicores, no such prediction technique on
accelerators such as GPUs. In this work, we present
Prophet, an approach to precisely predict the
performance degradation of latency-sensitive
applications on accelerators due to application
co-location. We analyzed the performance interference
on accelerators through a real system investigation and
found that unlike on multicores where the key
contentious resources are shared caches and main memory
bandwidth, the key contentious resources on
accelerators are instead processing elements,
accelerator memory bandwidth and PCIe bandwidth. Based
on this observation, we designed interference models
that enable the precise prediction for processing
element, accelerator memory bandwidth and PCIe
bandwidth contention on real hardware. By using a novel
technique to forecast solo-run execution traces of the
co-located applications using interference models,
Prophet can accurately predict the performance
degradation of latency-sensitive applications on
non-preemptive accelerators. Using Prophet, we can
identify ``safe'' co-locations on accelerators to
improve utilization without violating the QoS target.
Our evaluation shows that Prophet can predict the
performance degradation with an average prediction
error 5.47\% on real systems. Meanwhile, based on the
prediction, Prophet achieves accelerator utilization
improvements of 49.9\% on average while maintaining the
QoS target of latency-sensitive applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Kanev:2017:MAM,
author = "Svilen Kanev and Sam Likun Xi and Gu-Yeon Wei and
David Brooks",
title = "{Mallacc}: Accelerating Memory Allocation",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "33--45",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037736",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent work shows that dynamic memory allocation
consumes nearly 7\% of all cycles in Google
datacenters. With the trend towards increased
specialization of hardware, we propose Mallacc, an
in-core hardware accelerator designed for broad use
across a number of high-performance, modern memory
allocators. The design of Mallacc is quite different
from traditional throughput-oriented hardware
accelerators. Because memory allocation requests tend
to be very frequent, fast, and interspersed inside
other application code, accelerators must be optimized
for latency rather than throughput and area overheads
must be kept to a bare minimum. Mallacc accelerates the
three primary operations of a typical memory allocation
request: size class computation, retrieval of a free
memory block, and sampling of memory usage. Our results
show that malloc latency can be reduced by up to 50\%
with a hardware cost of less than 1500 um2 of silicon
area, less than 0.006\% of a typical high-performance
processor core.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Wen:2017:REV,
author = "Shasha Wen and Milind Chabbi and Xu Liu",
title = "{REDSPY}: Exploring Value Locality in Software",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "47--61",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037729",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Complex code bases with several layers of abstractions
have abundant inefficiencies that affect the execution
time. Value redundancy is a kind of inefficiency where
the same values are repeatedly computed, stored, or
retrieved over the course of execution. Not all
redundancies can be easily detected or eliminated with
compiler optimization passes due to the inherent
limitations of the static analysis. Microscopic
observation of whole executions at instruction- and
operand-level granularity breaks down abstractions and
helps recognize redundancies that masquerade in complex
programs. We have developed REDSPY---a fine-grained
profiler to pinpoint and quantify redundant operations
in program executions. Value redundancy may happen over
time at same locations or in adjacent locations, and
thus it has temporal and spatial locality. REDSPY
identifies both temporal and spatial value locality.
Furthermore, REDSPY is capable of identifying values
that are approximately the same, enabling optimization
opportunities in HPC codes that often use floating
point computations. REDSPY provides intuitive
optimization guidance by apportioning redundancies to
their provenance---source lines and execution calling
contexts. REDSPY pinpointed dramatically high volume of
redundancies in programs that were optimization targets
for decades, such as SPEC CPU2006 suite, Rodinia
benchmark, and NWChem---a production computational
chemistry code. Guided by REDSPY, we were able to
eliminate redundancies that resulted in significant
speedups.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Bhattacharjee:2017:TTP,
author = "Abhishek Bhattacharjee",
title = "Translation-Triggered Prefetching",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "63--76",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037705",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We propose translation-enabled memory prefetching
optimizations or TEMPO, a low-overhead hardware
mechanism to boost memory performance by exploiting the
operating system's (OS) virtual memory subsystem. We
are the first to make the following observations: (1) a
substantial fraction (20-40\%) of DRAM references in
modern big-data workloads are devoted to accessing page
tables; and (2) when memory references require page
table lookups in DRAM, the vast majority of them
(98\%+) also look up DRAM for the subsequent data
access. TEMPO exploits these observations to enable
DRAM row-buffer and on-chip cache prefetching of the
data that page tables point to. TEMPO requires trivial
changes to the memory controller (under 3\% additional
area), no OS or application changes, and improves
performance by 10-30\% and energy by 1-14\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Kim:2017:TAA,
author = "Channoh Kim and Jaehyeok Kim and Sungmin Kim and
Dooyoung Kim and Namho Kim and Gitae Na and Young H. Oh
and Hyeon Gyu Cho and Jae W. Lee",
title = "Typed Architectures: Architectural Support for
Lightweight Scripting",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "77--90",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037726",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Dynamic scripting languages are becoming more and more
widely adopted not only for fast prototyping but also
for developing production-grade applications. They
provide high-productivity programming environments
featuring high levels of abstraction with powerful
built-in functions, automatic memory management,
object-oriented programming paradigm and dynamic
typing. However, their flexible, dynamic type systems
easily become the source of inefficiency in terms of
instruction count, memory footprint, and energy
consumption. This overhead makes it challenging to
deploy these high-productivity programming technologies
on emerging single-board computers for IoT
applications. Addressing this challenge, this paper
introduces Typed Architectures, a high-efficiency,
low-cost execution substrate for dynamic scripting
languages, where each data variable retains high-level
type information at an ISA level. Typed Architectures
calculate and check the dynamic type of each variable
implicitly in hardware, rather than explicitly in
software, hence significantly reducing instruction
count for dynamic type checking. Besides, Typed
Architectures introduce polymorphic instructions (e.g.,
xadd), which are bound to the correct native
instruction at runtime within the pipeline (e.g., add
or fadd) to efficiently implement polymorphic
operators. Finally, Typed Architectures provide
hardware support for flexible yet efficient type tag
extraction and insertion, capturing common data layout
patterns of tag-value pairs. Our evaluation using a
fully synthesizable RISC-V RTL design on FPGA shows
that Typed Architectures achieve geomean speedups of
11.2\% and 9.9\% with maximum speedups of 32.6\% and
43.5\% for two production-grade scripting engines for
JavaScript and Lua, respectively. Moreover, Typed
Architectures improve the energy-delay product (EDP) by
19.3\% for JavaScript and 16.5\% for Lua with an area
overhead of 1.6\% at a 40nm technology node.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Seo:2017:FAS,
author = "Jihye Seo and Wook-Hee Kim and Woongki Baek and
Beomseok Nam and Sam H. Noh",
title = "Failure-Atomic Slotted Paging for Persistent Memory",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "91--104",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037737",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The slotted-page structure is a database page format
commonly used for managing variable-length records. In
this work, we develop a novel ``failure-atomic slotted
page structure'' for persistent memory that leverages
byte addressability and durability of persistent memory
to minimize redundant write operations used to maintain
consistency in traditional database systems.
Failure-atomic slotted paging consists of two key
elements: (i) in-place commit per page using hardware
transactional memory and (ii) slot header logging that
logs the commit mark of each page. The proposed scheme
is implemented in SQLite and compared against NVWAL,
the current state-of-the-art scheme. Our performance
study shows that our failure-atomic slotted paging
shows optimal performance for database transactions
that insert a single record. For transactions that
touch more than one database page, our proposed
slot-header logging scheme minimizes the logging
overhead by avoiding duplicating pages and logging only
the metadata of the dirty pages. Overall, we find that
our failure-atomic slotted-page management scheme
reduces database logging overhead to 1/6 and improves
query response time by up to 33\% compared to NVWAL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Nguyen:2017:WSP,
author = "Donald Nguyen and Keshav Pingali",
title = "What Scalable Programs Need from Transactional
Memory",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "105--118",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037750",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Transactional memory (TM) has been the focus of
numerous studies, and it is supported in processors
such as the IBM Blue Gene/Q and Intel Haswell. Many
studies have used the STAMP benchmark suite to evaluate
their designs. However, the speedups obtained for the
STAMP benchmarks on all TM systems we know of are quite
limited; for example, with 64 threads on the IBM Blue
Gene/Q, we observe a median speedup of 1.4X using the
Blue Gene/Q hardware transactional memory (HTM), and a
median speedup of 4.1X using a software transactional
memory (STM). What limits the performance of these
benchmarks on TMs? In this paper, we argue that the
problem lies with the programming model and data
structures used to write them. To make this point, we
articulate two principles that we believe must be
embodied in any scalable program and argue that STAMP
programs violate both of them. By modifying the STAMP
programs to satisfy both principles, we produce a new
set of programs that we call the Stampede suite. Its
median speedup on the Blue Gene/Q is 8.0X when using an
STM. The two principles also permit us to simplify the
TM design. Using this new STM with the Stampede
benchmarks, we obtain a median speedup of 17.7X with 64
threads on the Blue Gene/Q and 13.2X with 32 threads on
an Intel Westmere system. These results suggest that
HTM and STM designs will benefit if more attention is
paid to the division of labor between application
programs, systems software, and hardware.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Trippel:2017:TMM,
author = "Caroline Trippel and Yatin A. Manerkar and Daniel
Lustig and Michael Pellauer and Margaret Martonosi",
title = "{TriCheck}: Memory Model Verification at the
Trisection of Software, Hardware, and {ISA}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "119--133",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037719",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory consistency models (MCMs) which govern
inter-module interactions in a shared memory system,
are a significant, yet often under-appreciated, aspect
of system design. MCMs are defined at the various
layers of the hardware-software stack, requiring
thoroughly verified specifications, compilers, and
implementations at the interfaces between layers.
Current verification techniques evaluate segments of
the system stack in isolation, such as proving compiler
mappings from a high-level language (HLL) to an ISA or
proving validity of a microarchitectural implementation
of an ISA. This paper makes a case for full-stack MCM
verification and provides a toolflow, TriCheck, capable
of verifying that the HLL, compiler, ISA, and
implementation collectively uphold MCM requirements.
The work showcases TriCheck's ability to evaluate a
proposed ISA MCM in order to ensure that each layer and
each mapping is correct and complete. Specifically, we
apply TriCheck to the open source RISC-V ISA [55],
seeking to verify accurate, efficient, and legal
compilations from C11. We uncover under-specifications
and potential inefficiencies in the current RISC-V ISA
documentation and identify possible solutions for each.
As an example, we find that a RISC-V-compliant
microarchitecture allows 144 outcomes forbidden by C11
to be observed out of 1,701 litmus tests examined.
Overall, this paper demonstrates the necessity of
full-stack verification for detecting MCM-related bugs
in the hardware-software stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Nalli:2017:APM,
author = "Sanketh Nalli and Swapnil Haria and Mark D. Hill and
Michael M. Swift and Haris Volos and Kimberly Keeton",
title = "An Analysis of Persistent Memory Use with {WHISPER}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "135--148",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037730",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging non-volatile memory (NVM) technologies
promise durability with read and write latencies
comparable to volatile memory (DRAM). We define
Persistent Memory (PM) as NVM accessed with byte
addressability at low latency via normal memory
instructions. Persistent-memory applications ensure the
consistency of persistent data by inserting ordering
points between writes to PM allowing the construction
of higher-level transaction mechanisms. An epoch is a
set of writes to PM between ordering points. To put
systems research in PM on a firmer footing, we
developed and analyzed a PM benchmark suite called
WHISPER (Wisconsin-HP Labs Suite for Persistence) that
comprises ten PM applications we gathered to cover all
current interfaces to PM. A quantitative analysis
reveals several insights: (a) only 4\% of writes in
PM-aware applications are to PM and the rest are to
volatile memory, (b) software transactions are often
implemented with 5 to 50 ordering points (c) 75\% of
epochs update exactly one 64B cache line, (d) 80\% of
epochs from the same thread depend on previous epochs
from the same thread, while few epochs depend on epochs
from other threads. Based on our analysis, we propose
the Hands-off Persistence System (HOPS) to track
updates to PM in hardware. Current hardware design
requires applications to force data to PM as each epoch
ends. HOPS provides high-level ISA primitives for
applications to express durability and ordering
constraints separately and enforces them automatically,
while achieving 24.3\% better performance over current
approaches to persistence.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Zhang:2017:PPD,
author = "Tong Zhang and Changhee Jung and Dongyoon Lee",
title = "{ProRace}: Practical Data Race Detection for
Production Use",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "149--162",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037708",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper presents ProRace, a dynamic data race
detector practical for production runs. It is
lightweight, but still offers high race detection
capability. To track memory accesses, ProRace leverages
instruction sampling using the performance monitoring
unit (PMU) in commodity processors. Our PMU driver
enables ProRace to sample more memory accesses at a
lower cost compared to the state-of-the-art Linux
driver. Moreover, ProRace uses PMU-provided execution
contexts including register states and program path,
and reconstructs unsampled memory accesses offline.
This technique allows \ProRace to overcome inherent
limitations of sampling and improve the detection
coverage by performing data race detection on the trace
with not only sampled but also reconstructed memory
accesses. Experiments using racy production software
including apache and mysql shows that, with a
reasonable offline cost, ProRace incurs only 2.6\%
overhead at runtime with 27.5\% detection probability
with a sampling period of 10,000.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Olson:2017:CGM,
author = "Lena E. Olson and Mark D. Hill and David A. Wood",
title = "Crossing Guard: Mediating Host-Accelerator Coherence
Interactions",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "163--176",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037715",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Specialized hardware accelerators have performance and
energy-efficiency advantages over general-purpose
processors. To fully realize these benefits and aid
programmability, accelerators may share a physical and
virtual address space and full cache coherence with the
host system. However, allowing accelerators ---
particularly those designed by third parties --- to
directly communicate with host coherence protocols
poses several problems. Host coherence protocols are
complex, vary between companies, and may be
proprietary, increasing burden on accelerator
designers. Bugs in the accelerator implementation may
cause crashes and other serious consequences to the
host system. We propose Crossing Guard, a coherence
interface between the host coherence system and
accelerators. The Crossing Guard interface provides the
accelerator designer with a standardized set of
coherence messages that are simple enough to aid in
design of bug-free coherent caches. At the same time,
they are sufficiently complex to allow customized and
optimized accelerator caches with performance
comparable to using the host protocol. The Crossing
Guard hardware is implemented as part of the trusted
host, and provides complete safety to the host
coherence system, even in the presence of a
pathologically buggy accelerator cache.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{McMahan:2017:ASF,
author = "Joseph McMahan and Michael Christensen and Lawton
Nichols and Jared Roesch and Sung-Yee Guo and Ben
Hardekopf and Timothy Sherwood",
title = "An Architecture Supporting Formal and Compositional
Binary Analysis",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "177--191",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037733",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Building a trustworthy life-critical embedded system
requires deep reasoning about the potential effects
that sequences of machine instructions can have on full
system operation. Rather than trying to analyze
complete binaries and the countless ways their
instructions can interact with one another --- memory,
side effects, control registers, implicit state, etc.
--- we explore a new approach. We propose an
architecture controlled by a thin computational layer
designed to tightly correspond with the lambda
calculus, drawing on principles of functional
programming to bring the assembly much closer to myriad
reasoning frameworks, such as the Coq proof assistant.
This approach allows assembly-level verified versions
of critical code to operate safely in tandem with
arbitrary code, including imperative and unverified
system components, without the need for large
supporting trusted computing bases. We demonstrate that
this computational layer can be built in such a way as
to simultaneously provide full programmability and
compact, precise, and complete semantics, while still
using hardware resources comparable to normal embedded
systems. To demonstrate the practicality of this
approach, our FPGA-implemented prototype runs an
embedded medical application which monitors and treats
life-threatening arrhythmias. Though the system
integrates untrusted and imperative components, our
architecture allows for the formal verification of
multiple properties of the end-to-end system, including
a proof of correctness of the assembly-level
implementation of the core algorithm, the integrity of
trusted data via a non-interference proof, and a
guarantee that our prototype meets critical timing
requirements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Hsiao:2017:ASI,
author = "Chun-Hung Hsiao and Satish Narayanasamy and Essam
Muhammad Idris Khan and Cristiano L. Pereira and Gilles
A. Pokam",
title = "{AsyncClock}: Scalable Inference of Asynchronous Event
Causality",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "193--205",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037712",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Asynchronous programming model is commonly used in
mobile systems and Web 2.0 environments. Asynchronous
race detectors use algorithms that are an order of
magnitude performance and space inefficient compared to
conventional data race detectors. We solve this problem
by identifying and addressing two important problems in
reasoning about causality between asynchronous events.
Unlike conventional signal-wait operations,
establishing causal order between two asynchronous
events is fundamentally more challenging as there is no
common handle they operate on. We propose a new
primitive named AsyncClock that addresses this problem
by explicitly tracking causally preceding events, and
show that AsyncClock can handle a wide variety of
asynchronous causality models. We also address the
important scalability problem of efficiently
identifying heirless events whose metadata can be
reclaimed. We built the first single-pass,
non-graph-based Android race detector using our
algorithm and applied it to find errors in 20 popular
applications. Our tool incurs about 6x performance
overhead, which is several times more efficient than
the state-of-the-art solution. It also scales well with
the execution length. We used our tool to find 147
previously unknown harmful races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Calciu:2017:BBC,
author = "Irina Calciu and Siddhartha Sen and Mahesh
Balakrishnan and Marcos K. Aguilera",
title = "Black-box Concurrent Data Structures for {NUMA}
Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "207--221",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037721",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "High-performance servers are Non-Uniform Memory Access
(NUMA) machines. To fully leverage these machines,
programmers need efficient concurrent data structures
that are aware of the NUMA performance artifacts. We
propose Node Replication (NR), a black-box approach to
obtaining such data structures. NR takes an arbitrary
sequential data structure and automatically transforms
it into a NUMA-aware concurrent data structure
satisfying linearizability. Using NR requires no
expertise in concurrent data structure design, and the
result is free of concurrency bugs. NR draws ideas from
two disciplines: shared-memory algorithms and
distributed systems. Briefly, NR implements a
NUMA-aware shared log, and then uses the log to
replicate data structures consistently across NUMA
nodes. NR is best suited for contended data structures,
where it can outperform lock-free algorithms by 3.1x,
and lock-based solutions by 30x. To show the benefits
of NR to a real application, we apply NR to the data
structures of Redis, an in-memory storage system. The
result outperforms other methods by up to 14x. The cost
of NR is additional memory for its log and replicas.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Vora:2017:CCR,
author = "Keval Vora and Chen Tian and Rajiv Gupta and Ziang
Hu",
title = "{CoRAL}: Confined Recovery in Distributed Asynchronous
Graph Processing",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "223--236",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037747",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Existing distributed asynchronous graph processing
systems employ checkpointing to capture globally
consistent snapshots and rollback all machines to most
recent checkpoint to recover from machine failures. In
this paper we argue that recovery in distributed
asynchronous graph processing does not require the
entire execution state to be rolled back to a globally
consistent state due to the relaxed asynchronous
execution semantics. We define the properties required
in the recovered state for it to be usable for correct
asynchronous processing and develop CoRAL, a
lightweight checkpointing and recovery algorithm.
First, this algorithm carries out confined recovery
that only rolls back graph execution states of the
failed machines to affect recovery. Second, it relies
upon lightweight checkpoints that capture locally
consistent snapshots with a reduced peak network
bandwidth requirement. Our experiments using real-world
graphs show that our technique recovers from failures
and finishes processing 1.5x to 3.2x faster compared to
the traditional asynchronous checkpointing and recovery
mechanism when failures impact 1 to 6 machines of a 16
machine cluster. Moreover, capturing locally consistent
snapshots significantly reduces intermittent high peak
bandwidth usage required to save the snapshots --- the
average reduction in 99th percentile bandwidth ranges
from 22\% to 51\% while 1 to 6 snapshot replicas are
being maintained.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Vora:2017:KFA,
author = "Keval Vora and Rajiv Gupta and Guoqing Xu",
title = "{KickStarter}: Fast and Accurate Computations on
Streaming Graphs via Trimmed Approximations",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "237--251",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037748",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Continuous processing of a streaming graph maintains
an approximate result of the iterative computation on a
recent version of the graph. Upon a user query, the
accurate result on the current graph can be quickly
computed by feeding the approximate results to the
iterative computation --- a form of incremental
computation that corrects the (small amount of) error
in the approximate result. Despite the effectiveness of
this approach in processing growing graphs, it is
generally not applicable when edge deletions are
present --- existing approximations can lead to either
incorrect results (e.g., monotonic computations
terminate at an incorrect minima/maxima) or poor
performance (e.g., with approximations, convergence
takes longer than performing the computation from
scratch). This paper presents KickStarter, a runtime
technique that can trim the approximate values for a
subset of vertices impacted by the deleted edges. The
trimmed approximation is both safe and profitable,
enabling the computation to produce correct results and
converge quickly. KickStarter works for a class of
monotonic graph algorithms and can be readily
incorporated in any existing streaming graph system.
Our experiments with four streaming algorithms on five
large graphs demonstrate that trimming not only
produces correct results but also accelerates these
algorithms by 8.5--23.7x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Powers:2017:BBG,
author = "Bobby Powers and John Vilk and Emery D. Berger",
title = "{Browsix}: Bridging the Gap Between {Unix} and the
Browser",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "253--266",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037727",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "Applications written to run on conventional operating
systems typically depend on OS abstractions like
processes, pipes, signals, sockets, and a shared file
system. Porting these applications to the web currently
requires extensive rewriting or hosting significant
portions of code server-side because browsers present a
nontraditional runtime environment that lacks OS
functionality. This paper presents Browsix, a framework
that bridges the considerable gap between conventional
operating systems and the browser, enabling unmodified
programs expecting a Unix-like environment to run
directly in the browser. Browsix comprises two core
parts: (1) a JavaScript-only system that makes core
Unix features (including pipes, concurrent processes,
signals, sockets, and a shared file system) available
to web applications; and (2) extended JavaScript
runtimes for C, C++, Go, and Node.js that support
running programs written in these languages as
processes in the browser. Browsix supports running a
POSIX shell, making it straightforward to connect
applications together via pipes. We illustrate
Browsix's capabilities via case studies that
demonstrate how it eases porting legacy applications to
the browser and enables new functionality. We
demonstrate a Browsix-enabled LaTeX editor that
operates by executing unmodified versions of pdfLaTeX
and BibTeX. This browser-only LaTeX editor can render
documents in seconds, making it fast enough to be
practical. We further demonstrate how Browsix lets us
port a client-server application to run entirely in the
browser for disconnected operation. Creating these
applications required less than 50 lines of glue code
and no code modifications, demonstrating how easily
Browsix can be used to build sophisticated web
applications from existing parts without
modification.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Rajbhandari:2017:OCM,
author = "Samyam Rajbhandari and Yuxiong He and Olatunji Ruwase
and Michael Carbin and Trishul Chilimbi",
title = "Optimizing {CNNs} on Multicores for Scalability,
Performance and Goodput",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "267--280",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037745",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Convolutional Neural Networks (CNN) are a class of
Artificial Neural Networks (ANN) that are highly
efficient at the pattern recognition tasks that
underlie difficult AI problems in a variety of domains,
such as speech recognition, object recognition, and
natural language processing. CNNs are, however,
computationally intensive to train. This paper presents
the first characterization of the performance
optimization opportunities for training CNNs on CPUs.
Our characterization includes insights based on the
structure of the network itself (i.e., intrinsic
arithmetic intensity of the convolution and its
scalability under parallelism) as well as dynamic
properties of its execution (i.e., sparsity of the
computation). Given this characterization, we present
an automatic framework called spg-CNN for optimizing
CNN training on CPUs. It comprises of a computation
scheduler for efficient parallel execution, and two
code generators: one that optimizes for sparsity, and
the other that optimizes for spatial reuse in
convolutions. We evaluate spg-CNN using convolutions
from a variety of real world benchmarks, and show that
spg-CNN can train CNNs faster than state-of-the-art
approaches by an order of magnitude.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Sundararajah:2017:LTN,
author = "Kirshanthan Sundararajah and Laith Sakka and Milind
Kulkarni",
title = "Locality Transformations for Nested Recursive
Iteration Spaces",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "281--295",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037720",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "There has been a significant amount of effort invested
in designing scheduling transformations such as loop
tiling and loop fusion that rearrange the execution of
dynamic instances of loop nests to place operations
that access the same data close together temporally. In
recent years, there has been interest in designing
similar transformations that operate on recursive
programs, but until now these transformations have only
considered simple scenarios: multiple recursions to be
fused, or a recursion nested inside a simple loop. This
paper develops the first set of scheduling
transformations for nested recursions: recursive
methods that call other recursive methods. These are
the recursive analog to nested loops. We present a
transformation called recursion twisting that
automatically improves locality at all levels of the
memory hierarchy, and show that this transformation can
yield substantial performance improvements across
several benchmarks that exhibit nested recursion.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Li:2017:LAC,
author = "Ang Li and Shuaiwen Leon Song and Weifeng Liu and Xu
Liu and Akash Kumar and Henk Corporaal",
title = "Locality-Aware {CTA} Clustering for Modern {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "297--311",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037709",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cache is designed to exploit locality; however, the
role of on-chip L1 data caches on modern GPUs is often
awkward. The locality among global memory requests from
different SMs (Streaming Multiprocessors) is
predominantly harvested by the commonly-shared L2 with
long access latency; while the in-core locality, which
is crucial for performance delivery, is handled
explicitly by user-controlled scratchpad memory. In
this work, we disclose another type of data locality
that has been long ignored but with performance
boosting potential --- the inter-CTA locality.
Exploiting such locality is rather challenging due to
unclear hardware feasibility, unknown and inaccessible
underlying CTA scheduler, and small in-core cache
capacity. To address these issues, we first conduct a
thorough empirical exploration on various modern GPUs
and demonstrate that inter-CTA locality can be
harvested, both spatially and temporally, on L1 or
L1/Tex unified cache. Through further quantification
process, we prove the significance and commonality of
such locality among GPU applications, and discuss
whether such reuse is exploitable. By leveraging these
insights, we propose the concept of CTA-Clustering and
its associated software-based techniques to reshape the
default CTA scheduling in order to group the CTAs with
potential reuse together on the same SM. Our techniques
require no hardware modification and can be directly
deployed on existing GPUs. In addition, we incorporate
these techniques into an integrated framework for
automatic inter-CTA locality optimization. We evaluate
our techniques using a wide range of popular GPU
applications on all modern generations of NVIDIA GPU
architectures. The results show that our proposed
techniques significantly improve cache performance
through reducing L2 cache transactions by 55\%, 65\%,
29\%, 28\% on average for Fermi, Kepler, Maxwell and
Pascal, respectively, leading to an average of 1.46x,
1.48x, 1.45x, 1.41x (up to 3.8x, 3.6x, 3.1x, 3.3x)
performance speedups for applications with
algorithm-related inter-CTA reuse.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Churchill:2017:SLS,
author = "Berkeley Churchill and Rahul Sharma and JF Bastien and
Alex Aiken",
title = "Sound Loop Superoptimization for {Google Native
Client}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "313--326",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037754",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software fault isolation (SFI) is an important
technique for the construction of secure operating
systems, web browsers, and other extensible software.
We demonstrate that superoptimization can dramatically
improve the performance of Google Native Client, a SFI
system that ships inside the Google Chrome Browser. Key
to our results are new techniques for superoptimization
of loops: we propose a new architecture for
superoptimization tools that incorporates both a fully
sound verification technique to ensure correctness and
a bounded verification technique to guide the search to
optimized code. In our evaluation we optimize 13 libc
string functions, formally verify the correctness of
the optimizations and report a median and average
speedup of 25\% over the libraries shipped by Google.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Bianchini:2017:IDE,
author = "Ricardo Bianchini",
title = "Improving Datacenter Efficiency",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "327--327",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3046426",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Internet companies can improve datacenter efficiency
and reduce costs, by minimizing resource waste while
avoiding (or limiting) performance degradation. In this
talk, I will first overview a few of the
efficiency-related efforts we are undertaking at
Microsoft, including leveraging workload history to
improve resource management. I will then discuss some
lessons from deploying these efforts in production and
how they relate to academic research.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Liu:2017:DBD,
author = "Mengxing Liu and Mingxing Zhang and Kang Chen and
Xuehai Qian and Yongwei Wu and Weimin Zheng and Jinglei
Ren",
title = "{DudeTM}: Building Durable Transactions with
Decoupling for Persistent Memory",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "329--343",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037714",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Emerging non-volatile memory (NVM) offers
non-volatility, byte-addressability and fast access at
the same time. To make the best use of these
properties, it has been shown by empirical evidence
that programs should access NVM directly through CPU
load and store instructions, so that the overhead of a
traditional file system or database can be avoided.
Thus, durable transactions become a common choice of
applications for accessing persistent memory data in a
crash consistent manner. However, existing durable
transaction systems employ either undo logging, which
requires a fence for every memory write, or redo
logging, which requires intercepting all memory reads
within transactions. This paper presents DUDETM, a
crash-consistent durable transaction system that avoids
the drawbacks of both undo logging and redo logging.
DUDETM uses shadow DRAM to decouple the execution of a
durable transaction into three fully asynchronous
steps. The advantage is that only minimal fences and no
memory read instrumentation are required. This design
also enables an out-of-the-box transactional memory
(TM) to be used as an independent component in our
system. The evaluation results show that DUDETM adds
durability to a TM system with only 7.4 ~ 24.6\%
throughput degradation. Compared to the existing
durable transaction systems, DUDETM provides 1.7times
to 4.4times higher throughput. Moreover, DUDETM can be
implemented with existing hardware TMs with minor
hardware modifications, leading to a further 1.7times
speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Klimovic:2017:RRF,
author = "Ana Klimovic and Heiner Litz and Christos Kozyrakis",
title = "{ReFlex}: Remote Flash $ \approx $ Local Flash",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "345--359",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037732",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Remote access to NVMe Flash enables flexible scaling
and high utilization of Flash capacity and IOPS within
a datacenter. However, existing systems for remote
Flash access either introduce significant performance
overheads or fail to isolate the multiple remote
clients sharing each Flash device. We present ReFlex, a
software-based system for remote Flash access, that
provides nearly identical performance to accessing
local Flash. ReFlex uses a dataplane kernel to closely
integrate networking and storage processing to achieve
low latency and high throughput at low resource
requirements. Specifically, ReFlex can serve up to 850K
IOPS per core over TCP/IP networking, while adding 21us
over direct access to local Flash. ReFlex uses a QoS
scheduler that can enforce tail latency and throughput
service-level objectives (SLOs) for thousands of remote
clients. We show that ReFlex allows applications to use
remote Flash while maintaining their original
performance with local Flash.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Jevdjic:2017:ASC,
author = "Djordje Jevdjic and Karin Strauss and Luis Ceze and
Henrique S. Malvar",
title = "Approximate Storage of Compressed and Encrypted
Videos",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "361--373",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037718",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The popularization of video capture devices has
created strong storage demand for encoded videos.
Approximate storage can ease this demand by enabling
denser storage at the expense of occasional errors.
Unfortunately, even minor storage errors, such as bit
flips, can result in major visual damage in encoded
videos. Similarly, video encryption, widely employed
for privacy and digital rights management, may create
long dependencies between bits that show little or no
tolerance to storage errors. In this paper we propose
VideoApp, a novel and efficient methodology to compute
bit-level reliability requirements for encoded videos
by tracking visual and metadata dependencies within
encoded bitstreams. We further show how VideoApp can be
used to trade video quality for storage density in an
optimal way. We integrate our methodology into a
popular H.264 encoder to partition an encoded video
stream into multiple streams that can receive different
levels of error correction according to their
reliability needs. When applied to a dense and highly
error-prone multi-level cell storage substrate, our
variable error correction mechanism reduces the error
correction overhead by half under the most
error-intolerant encoder settings, achieving
quality/density points that neither compression nor
approximation can achieve alone. Finally, we define the
basic invariants needed to support encrypted
approximate video storage. We present an analysis of
block cipher modes of operation, showing that some are
fully compatible with approximation, enabling
approximate and secure video storage systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Elyasi:2017:EIR,
author = "Nima Elyasi and Mohammad Arjomand and Anand
Sivasubramaniam and Mahmut T. Kandemir and Chita R. Das
and Myoungsoo Jung",
title = "Exploiting Intra-Request Slack to Improve {SSD}
Performance",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "375--388",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037728",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With Solid State Disks (SSDs) offering high degrees of
parallelism, SSD controllers place data and direct
requests to exploit the maximum offered hardware
parallelism. In the quest to maximize parallelism and
utilization, sub-requests of a request that are
directed to different flash chips by the scheduler can
experience differential wait times since their
individual queues are not coordinated and load balanced
at all times. Since the macro request is considered
complete only when its last sub-request completes, some
of its sub-requests that complete earlier have to
necessarily wait for this last sub-request. This paper
opens the door to a new class of schedulers to leverage
such slack between sub-requests in order to improve
response times. Specifically, the paper presents the
design and implementation of a slack-enabled
re-ordering scheduler, called Slacker, for sub-requests
issued to each flash chip. Layered under a modern SSD
request scheduler, Slacker estimates the slack of each
incoming sub-request to a flash chip and allows them to
jump ahead of existing sub-requests with sufficient
slack so as to not detrimentally impact their response
times. Slacker is simple to implement and imposes only
marginal additions to the hardware. Using a spectrum of
21 workloads with diverse read-write characteristics,
we show that Slacker provides as much as 19.5\%, 13\%
and 14.5\% improvement in response times, with average
improvements of 12\%, 6.5\% and 8.5\%, for
write-intensive, read-intensive and read-write balanced
workloads, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Wang:2017:GSM,
author = "Kai Wang and Aftab Hussain and Zhiqiang Zuo and
Guoqing Xu and Ardalan Amiri Sani",
title = "{Graspan}: a Single-machine Disk-based Graph System
for Interprocedural Static Analyses of Large-scale
Systems Code",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "389--404",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037744",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "There is more than a decade-long history of using
static analysis to find bugs in systems such as Linux.
Most of the existing static analyses developed for
these systems are simple checkers that find bugs based
on pattern matching. Despite the presence of many
sophisticated interprocedural analyses, few of them
have been employed to improve checkers for systems code
due to their complex implementations and poor
scalability. In this paper, we revisit the scalability
problem of interprocedural static analysis from a ``Big
Data'' perspective. That is, we turn sophisticated code
analysis into Big Data analytics and leverage novel
data processing techniques to solve this traditional
programming language problem. We develop Graspan, a
disk-based parallel graph system that uses an edge-pair
centric computation model to compute dynamic transitive
closures on very large program graphs. We implement
context-sensitive pointer/alias and dataflow analyses
on Graspan. An evaluation of these analyses on large
codebases such as Linux shows that their Graspan
implementations scale to millions of lines of code and
are much simpler than their original implementations.
Moreover, we show that these analyses can be used to
augment the existing checkers; these augmented checkers
uncovered 132 new NULL pointer bugs and 1308
unnecessary NULL tests in Linux 4.4.0-rc5, PostgreSQL
8.3.9, and Apache httpd 2.2.18.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Ren:2017:SDH,
author = "Ao Ren and Zhe Li and Caiwen Ding and Qinru Qiu and
Yanzhi Wang and Ji Li and Xuehai Qian and Bo Yuan",
title = "{SC-DCNN}: Highly-Scalable Deep Convolutional Neural
Network using Stochastic Computing",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "405--418",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037746",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the recent advance of wearable devices and
Internet of Things (IoTs), it becomes attractive to
implement the Deep Convolutional Neural Networks
(DCNNs) in embedded and portable systems. Currently,
executing the software-based DCNNs requires
high-performance servers, restricting the widespread
deployment on embedded and mobile IoT devices. To
overcome this obstacle, considerable research efforts
have been made to develop highly-parallel and
specialized DCNN accelerators using GPGPUs, FPGAs or
ASICs. Stochastic Computing (SC), which uses a
bit-stream to represent a number within [-1, 1] by
counting the number of ones in the bit-stream, has high
potential for implementing DCNNs with high scalability
and ultra-low hardware footprint. Since multiplications
and additions can be calculated using AND gates and
multiplexers in SC, significant reductions in power
(energy) and hardware footprint can be achieved
compared to the conventional binary arithmetic
implementations. The tremendous savings in power
(energy) and hardware resources allow immense design
space for enhancing scalability and robustness for
hardware DCNNs. This paper presents SC-DCNN, the first
comprehensive design and optimization framework of
SC-based DCNNs, using a bottom-up approach. We first
present the designs of function blocks that perform the
basic operations in DCNN, including inner product,
pooling, and activation function. Then we propose four
designs of feature extraction blocks, which are in
charge of extracting features from input feature maps,
by connecting different basic function blocks with
joint optimization. Moreover, the efficient weight
storage methods are proposed to reduce the area and
power (energy) consumption. Putting all together, with
feature extraction blocks carefully selected, SC-DCNN
is holistically optimized to minimize area and power
(energy) consumption while maintaining high network
accuracy. Experimental results demonstrate that the
LeNet5 implemented in SC-DCNN consumes only 17 mm$^2$
area and 1.53 W power, achieves throughput of 781250
images/s, area efficiency of 45946 images/s/ mm$^2$,
and energy efficiency of 510734 images/J.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Ajay:2017:GIL,
author = "Jerry Ajay and Chen Song and Aditya Singh Rathore and
Chi Zhou and Wenyao Xu",
title = "{$3$DGates}: an Instruction-Level Energy Analysis and
Optimization of {$3$D} Printers",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "419--433",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037752",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As the next-generation manufacturing driven force, 3D
printing technology is having a transformative effect
on various industrial domains and has been widely
applied in a broad spectrum of applications. It also
progresses towards other versatile fields with portable
battery-powered 3D printers working on a limited energy
budget. While reducing manufacturing energy is an
essential challenge in industrial sustainability and
national economics, this growing trend motivates us to
explore the energy consumption of the 3D printer for
the purpose of energy efficiency. To this end, we
perform an in-depth analysis of energy consumption in
commercial, off-the-shelf 3D printers from an
instruction-level perspective. We build an
instruction-level energy model and an energy profiler
to analyze the energy cost during the fabrication
process. From the insights obtained by the energy
profiler, we propose and implement a cross-layer energy
optimization solution, called 3DGates, which spans the
instruction-set, the compiler and the firmware. We
evaluate 3DGates over 338 benchmarks on a 3D printer
and achieve an overall energy reduction of 25\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Cox:2017:EAT,
author = "Guilherme Cox and Abhishek Bhattacharjee",
title = "Efficient Address Translation for Architectures with
Multiple Page Sizes",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "435--448",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037704",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Processors and operating systems (OSes) support
multiple memory page sizes. Superpages increase
Translation Lookaside Buffer (TLB) hits, while small
pages provide fine-grained memory protection. Ideally,
TLBs should perform well for any distribution of page
sizes. In reality, set-associative TLBs --- used
frequently for their energy efficiency compared to
fully-associative TLBs --- cannot (easily) support
multiple page sizes concurrently. Instead, commercial
systems typically implement separate set-associative
TLBs for different page sizes. This means that when
superpages are allocated aggressively, TLB misses may,
counter intuitively, increase even if entries for small
pages remain unused (and vice-versa). We invent MIX
TLBs, energy-frugal set-associative structures that
concurrently support all page sizes by exploiting
superpage allocation patterns. MIX TLBs boost the
performance (often by 10-30\%) of big-memory
applications on native CPUs, virtualized CPUs, and
GPUs. MIX TLBs are simple and require no OS or program
changes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Lesokhin:2017:PFS,
author = "Ilya Lesokhin and Haggai Eran and Shachar Raindel and
Guy Shapiro and Sagi Grimberg and Liran Liss and Muli
Ben-Yehuda and Nadav Amit and Dan Tsafrir",
title = "Page Fault Support for Network Controllers",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "449--466",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037710",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Direct network I/O allows network controllers (NICs)
to expose multiple instances of themselves, to be used
by untrusted software without a trusted intermediary.
Direct I/O thus frees researchers from legacy software,
fueling studies that innovate in multitenant setups.
Such studies, however, overwhelmingly ignore one
serious problem: direct memory accesses (DMAs) of NICs
disallow page faults, forcing systems to either pin
entire address spaces to physical memory and thereby
hinder memory utilization, or resort to APIs that
pin/unpin memory buffers before/after they are DMAed,
which complicates the programming model and hampers
performance. We solve this problem by designing and
implementing page fault support for InfiniBand and
Ethernet NICs. A main challenge we tackle---unique to
NICs---is handling receive DMAs that trigger page
faults, leaving the NIC without memory to store the
incoming data. We demonstrate that our solution
provides all the benefits associated with ``regular''
virtual memory, notably (1) a simpler programming model
that rids users from the need to pin, and (2) the
ability to employ all the canonical memory
optimizations, such as memory overcommitment and
demand-paging based on actual use. We show that, as a
result, benchmark performance improves by up to 1.9x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Hu:2017:TFC,
author = "Yang Hu and Mingcong Song and Tao Li",
title = "Towards {``Full Containerization''} in Containerized
Network Function Virtualization",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "467--481",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037713",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With exploding traffic stuffing existing network
infra-structure, today's telecommunication and cloud
service providers resort to Network Function
Virtualization (NFV) for greater agility and economics.
Pioneer service provider such as AT{\&}T proposes to
adopt container in NFV to achieve shorter Virtualized
Network Function (VNF) provisioning time and better
runtime performance. However, we characterize typical
NFV work-loads on the containers and find that the
performance is unsatisfactory. We observe that the
shared host OS net-work stack is the main bottleneck,
where the traffic flow processing involves a large
amount of intermediate memory buffers and results in
significant last level cache pollution. Existing OS
memory allocation policies fail to exploit the locality
and data sharing information among buffers. In this
paper, we propose NetContainer, a software framework
that achieves fine-grained hardware resource management
for containerized NFV platform. NetContainer employs a
cache access overheads guided page coloring scheme to
coordinately address the inter-flow cache access
overheads and intra-flow cache access overheads. It
maps the memory buffer pages that manifest low cache
access overheads (across a flow or among the flows) to
the same last level cache partition. NetContainer
exploits a footprint theory based method to estimate
the cache access overheads and a Min-Cost Max-Flow
model to guide the memory buffer mappings. We implement
the NetContainer in Linux kernel and extensively
evaluate it with real NFV workloads. Experimental
results show that NetContainer outperforms conventional
page coloring-based memory allocator by 48\% in terms
of successful call rate.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Wu:2017:FEF,
author = "Bo Wu and Xu Liu and Xiaobo Zhou and Changjun Jiang",
title = "{FLEP}: Enabling Flexible and Efficient Preemption on
{GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "483--496",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037742",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPUs are widely adopted in HPC and cloud computing
platforms to accelerate general-purpose workloads.
However, modern GPUs do not support flexible
preemption, leading to performance and priority
inversion problems in multi-tasking environments. In
this paper, we propose and develop FLEP, the first
software system that enables flexible kernel preemption
and kernel scheduling on commodity GPUs. The FLEP
compilation engine transforms the GPU program into
preemptable forms, which can be interrupted during
execution and yield all or part of the streaming
multi-processors (SMs) in the GPU. The FLEP runtime
engine intercepts all kernel invocations and determines
which kernels and how those kernels should be preempted
and scheduled. Experimental results on two-kernel
co-runs demonstrate up to 24.2X speedup for
high-priority kernels and up to 27X improvement on
normalized average turnaround time for kernels with the
same priority. FLEP reduces the preemption latency by
up to 41\% compared to yielding the whole GPU when the
waiting kernels only need several SMs. With all the
benefits, FLEP only introduces 2.5\% runtime overhead,
which is substantially lower than the kernel slicing
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Li:2017:SSA,
author = "Kaiwei Li and Jianfei Chen and Wenguang Chen and Jun
Zhu",
title = "{SaberLDA}: Sparsity-Aware Learning of Topic Models on
{GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "497--509",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037740",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Latent Dirichlet Allocation (LDA) is a popular tool
for analyzing discrete count data such as text and
images. Applications require LDA to handle both large
datasets and a large number of topics. Though
distributed CPU systems have been used, GPU-based
systems have emerged as a promising alternative because
of the high computational power and memory bandwidth of
GPUs. However, existing GPU-based LDA systems cannot
support a large number of topics because they use
algorithms on dense data structures whose time and
space complexity is linear to the number of topics. In
this paper, we propose SaberLDA, a GPU-based LDA system
that implements a sparsity-aware algorithm to achieve
sublinear time complexity and scales well to learn a
large number of topics. To address the challenges
introduced by sparsity, we propose a novel data layout,
a new warp-based sampling kernel, and an efficient
sparse count matrix updating algorithm that improves
locality, makes efficient utilization of GPU warps, and
reduces memory consumption. Experiments show that
SaberLDA can learn from billions-token-scale data with
up to 10,000 topics, which is almost two orders of
magnitude larger than that of the previous GPU-based
systems. With a single GPU card, SaberLDA is able to
learn 10,000 topics from a dataset of billions of
tokens in a few hours, which is only achievable with
clusters with tens of machines before.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Khazraee:2017:MNO,
author = "Moein Khazraee and Lu Zhang and Luis Vega and Michael
Bedford Taylor",
title = "{Moonwalk}: {NRE} Optimization in {ASIC} Clouds",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "511--526",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037749",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cloud services are becoming increasingly globalized
and data-center workloads are expanding exponentially.
GPU and FPGA-based clouds have illustrated improvements
in power and performance by accelerating
compute-intensive workloads. ASIC-based clouds are a
promising way to optimize the Total Cost of Ownership
(TCO) of a given datacenter computation (e.g. YouTube
transcoding) by reducing both energy consumption and
marginal computation cost. The feasibility of an ASIC
Cloud for a particular application is directly gated by
the ability to manage the Non-Recurring Engineering
(NRE) costs of designing and fabricating the ASIC, so
that it is significantly lower (e.g. 2X) than the TCO
of the best available alternative. In this paper, we
show that technology node selection is a major tool for
managing ASIC Cloud NRE, and allows the designer to
trade off an accelerator's excess energy efficiency and
cost performance for lower total cost. We explore NRE
and cross-technology optimization of ASIC Clouds for
four different applications: Bitcoin mining,
YouTube-style video transcoding, Litecoin, and Deep
Learning. We address these challenges and show large
reductions in the NRE, potentially enabling ASIC Clouds
to address a wider variety of datacenter workloads. Our
results suggest that advanced nodes like 16nm will lead
to sub-optimal TCO for many workloads, and that use of
older nodes like 65nm can enable a greater diversity of
ASIC Clouds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Park:2017:DRM,
author = "Jason Jong Kyu Park and Yongjun Park and Scott
Mahlke",
title = "Dynamic Resource Management for Efficient Utilization
of Multitasking {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "527--540",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037707",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As graphics processing units (GPUs) are broadly
adopted, running multiple applications on a GPU at the
same time is beginning to attract wide attention.
Recent proposals on multitasking GPUs have focused on
either spatial multitasking, which partitions GPU
resource at a streaming multiprocessor (SM)
granularity, or simultaneous multikernel (SMK), which
runs multiple kernels on the same SM. However,
multitasking performance varies heavily depending on
the resource partitions within each scheme, and the
application mixes. In this paper, we propose GPU
Maestro that performs dynamic resource management for
efficient utilization of multitasking GPUs. GPU Maestro
can discover the best performing GPU resource partition
exploiting both spatial multitasking and SMK.
Furthermore, dynamism within a kernel and interference
between the kernels are automatically considered
because GPU Maestro finds the best performing partition
through direct measurements. Evaluations show that GPU
Maestro can improve average system throughput by 20.2\%
and 13.9\% over the baseline spatial multitasking and
SMK, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Zhang:2017:ISC,
author = "Rui Zhang and Natalie Stanley and Christopher Griggs
and Andrew Chi and Cynthia Sturton",
title = "Identifying Security Critical Properties for the
Dynamic Verification of a Processor",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "541--554",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037734",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present a methodology for identifying security
critical properties for use in the dynamic verification
of a processor. Such verification has been shown to be
an effective way to prevent exploits of vulnerabilities
in the processor, given a meaningful set of security
properties. We use known processor errata to establish
an initial set of security-critical invariants of the
processor. We then use machine learning to infer an
additional set of invariants that are not tied to any
particular, known vulnerability, yet are critical to
security. We build a tool chain implementing the
approach and evaluate it for the open-source OR1200
RISC processor. We find that our tool can identify 19
(86.4\%) of the 22 manually crafted security-critical
properties from prior work and generates 3 new security
properties not covered in prior work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Ferraiuolo:2017:VPH,
author = "Andrew Ferraiuolo and Rui Xu and Danfeng Zhang and
Andrew C. Myers and G. Edward Suh",
title = "Verification of a Practical Hardware Security
Architecture Through Static Information Flow Analysis",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "555--568",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037739",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Hardware-based mechanisms for software isolation are
becoming increasingly popular, but implementing these
mechanisms correctly has proved difficult, undermining
the root of security. This work introduces an effective
way to formally verify important properties of such
hardware security mechanisms. In our approach, hardware
is developed using a lightweight security-typed
hardware description language (HDL) that performs
static information flow analysis. We show the
practicality of our approach by implementing and
verifying a simplified but realistic multi-core
prototype of the ARM TrustZone architecture. To make
the security-typed HDL expressive enough to verify a
realistic processor, we develop new type system
features. Our experiments suggest that information flow
analysis is efficient, and programmer effort is modest.
We also show that information flow constraints are an
effective way to detect hardware vulnerabilities,
including several found in commercial processors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Chisnall:2017:CJS,
author = "David Chisnall and Brooks Davis and Khilan Gudka and
David Brazdil and Alexandre Joannou and Jonathan
Woodruff and A. Theodore Markettos and J. Edward Maste
and Robert Norton and Stacey Son and Michael Roe and
Simon W. Moore and Peter G. Neumann and Ben Laurie and
Robert N. M. Watson",
title = "{CHERI JNI}: Sinking the {Java} Security Model into
the {C}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "569--583",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037725",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Java provides security and robustness by building a
high-level security model atop the foundation of memory
protection. Unfortunately, any native code linked into
a Java program --- including the million lines used to
implement the standard library --- is able to bypass
both the memory protection and the higher-level
policies. We present a hardware-assisted implementation
of the Java native code interface, which extends the
guarantees required for Java's security model to native
code. Our design supports safe direct access to buffers
owned by the JVM, including hardware-enforced read-only
access where appropriate. We also present Java language
syntax to declaratively describe isolated compartments
for native code. We show that it is possible to
preserve the memory safety and isolation requirements
of the Java security model in C code, allowing native
code to run in the same process as Java code with the
same impact on security as running equivalent Java
code. Our approach has a negligible impact on
performance, compared with the existing unsafe native
code interface. We demonstrate a prototype
implementation running on the CHERI microprocessor
synthesized in FPGA.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Ge:2017:GGC,
author = "Xinyang Ge and Weidong Cui and Trent Jaeger",
title = "{GRIFFIN}: Guarding Control Flows Using {Intel}
Processor Trace",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "585--598",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037716",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Researchers are actively exploring techniques to
enforce control-flow integrity (CFI), which restricts
program execution to a predefined set of targets for
each indirect control transfer to prevent code-reuse
attacks. While hardware-assisted CFI enforcement may
have the potential for advantages in performance and
flexibility over software instrumentation, current
hardware-assisted defenses are either incomplete (i.e.,
do not enforce all control transfers) or less efficient
in comparison. We find that the recent introduction of
hardware features to log complete control-flow traces,
such as Intel Processor Trace (PT), provides an
opportunity to explore how efficient and flexible a
hardware-assisted CFI enforcement system may become.
While Intel PT was designed to aid in offline debugging
and failure diagnosis, we explore its effectiveness for
online CFI enforcement over unmodified binaries by
designing a parallelized method for enforcing various
types of CFI policies. We have implemented a prototype
called GRIFFIN in the Linux 4.2 kernel that enables
complete CFI enforcement over a variety of software,
including the Firefox browser and its jitted code. Our
experiments show that GRIFFIN can enforce fine-grained
CFI policies with shadow stack as recommended by
researchers at a performance that is comparable to
software-only instrumentation techniques. In addition,
we find that alternative logging approaches yield
significant performance improvements for trace
processing, identifying opportunities for further
hardware assistance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Delimitrou:2017:BKW,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{Bolt}: {I} Know What You Did Last Summer \ldots{} In
The Cloud",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "599--613",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037703",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Cloud providers routinely schedule multiple
applications per physical host to increase efficiency.
The resulting interference on shared resources often
leads to performance degradation and, more importantly,
security vulnerabilities. Interference can leak
important information ranging from a service's
placement to confidential data, like private keys. We
present Bolt, a practical system that accurately
detects the type and characteristics of applications
sharing a cloud platform based on the interference an
adversary sees on shared resources. Bolt leverages
online data mining techniques that only require 2-5
seconds for detection. In a multi-user study on EC2,
Bolt correctly identifies the characteristics of 385
out of 436 diverse workloads. Extracting this
information enables a wide spectrum of
previously-impractical cloud attacks, including denial
of service attacks (DoS) that increase tail latency by
140x, as well as resource freeing (RFA) and
co-residency attacks. Finally, we show that while
advanced isolation mechanisms, such as cache
partitioning lower detection accuracy, they are
insufficient to eliminate these vulnerabilities
altogether. To do so, one must either disallow core
sharing, or only allow it between threads of the same
application, leading to significant inefficiencies and
performance penalties.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Kang:2017:NCI,
author = "Yiping Kang and Johann Hauswald and Cao Gao and Austin
Rovinski and Trevor Mudge and Jason Mars and Lingjia
Tang",
title = "{Neurosurgeon}: Collaborative Intelligence Between the
Cloud and Mobile Edge",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "615--629",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037698",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The computation for today's intelligent personal
assistants such as Apple Siri, Google Now, and
Microsoft Cortana, is performed in the cloud. This
cloud-only approach requires significant amounts of
data to be sent to the cloud over the wireless network
and puts significant computational pressure on the
datacenter. However, as the computational resources in
mobile devices become more powerful and energy
efficient, questions arise as to whether this
cloud-only processing is desirable moving forward, and
what are the implications of pushing some or all of
this compute to the mobile devices on the edge. In this
paper, we examine the status quo approach of cloud-only
processing and investigate computation partitioning
strategies that effectively leverage both the cycles in
the cloud and on the mobile device to achieve low
latency, low energy consumption, and high datacenter
throughput for this class of intelligent applications.
Our study uses 8 intelligent applications spanning
computer vision, speech, and natural language domains,
all employing state-of-the-art Deep Neural Networks
(DNNs) as the core machine learning technique. We find
that given the characteristics of DNN algorithms, a
fine-grained, layer-level computation partitioning
strategy based on the data and computation variations
of each layer within a DNN has significant latency and
energy advantages over the status quo approach. Using
this insight, we design Neurosurgeon, a lightweight
scheduler to automatically partition DNN computation
between mobile devices and datacenters at the
granularity of neural network layers. Neurosurgeon does
not require per-application profiling. It adapts to
various DNN architectures, hardware platforms, wireless
networks, and server load levels, intelligently
partitioning computation for best latency or best
mobile energy. We evaluate Neurosurgeon on a
state-of-the-art mobile development platform and show
that it improves end-to-end latency by 3.1X on average
and up to 40.7X, reduces mobile energy consumption by
59.5\% on average and up to 94.7\%, and improves
datacenter throughput by 1.5X on average and up to
6.7X.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Agarwal:2017:TAT,
author = "Neha Agarwal and Thomas F. Wenisch",
title = "{Thermostat}: Application-transparent Page Management
for Two-tiered Main Memory",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "631--644",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037706",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The advent of new memory technologies that are denser
and cheaper than commodity DRAM has renewed interest in
two-tiered main memory schemes. Infrequently accessed
application data can be stored in such memories to
achieve significant memory cost savings. Past research
on two-tiered main memory has assumed a 4KB page size.
However, 2MB huge pages are performance critical in
cloud applications with large memory footprints,
especially in virtualized cloud environments, where
nested paging drastically increases the cost of 4KB
page management. We present Thermostat, an
application-transparent huge-page-aware mechanism to
place pages in a dual-technology hybrid memory system
while achieving both the cost advantages of two-tiered
memory and performance advantages of transparent huge
pages. We present an online page classification
mechanism that accurately classifies both 4KB and 2MB
pages as hot or cold while incurring no observable
performance overhead across several representative
cloud applications. We implement Thermostat in Linux
kernel version 4.5 and evaluate its effectiveness on
representative cloud computing workloads running under
KVM virtualization. We emulate slow memory with
performance characteristics approximating near-future
high-density memory technology and show that Thermostat
migrates up to 50\% of application footprint to slow
memory while limiting performance degradation to 3\%,
thereby reducing memory cost up to 30\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Barbalace:2017:BBH,
author = "Antonio Barbalace and Robert Lyerly and Christopher
Jelesnianski and Anthony Carno and Ho-Ren Chuang and
Vincent Legout and Binoy Ravindran",
title = "Breaking the Boundaries in Heterogeneous-{ISA}
Datacenters",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "645--659",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037738",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Energy efficiency is one of the most important design
considerations in running modern datacenters.
Datacenter operating systems rely on software
techniques such as execution migration to achieve
energy efficiency across pools of machines. Execution
migration is possible in datacenters today because they
consist mainly of homogeneous-ISA machines. However,
recent market trends indicate that alternate ISAs such
as ARM and PowerPC are pushing into the datacenter,
meaning current execution migration techniques are no
longer applicable. How can execution migration be
applied in future heterogeneous-ISA datacenters? In
this work we present a compiler, runtime, and an
operating system extension for enabling execution
migration between heterogeneous-ISA servers. We present
a new multi-ISA binary architecture and
heterogeneous-OS containers for facilitating efficient
migration of natively-compiled applications. We build
and evaluate a prototype of our design and demonstrate
energy savings of up to 66\% for a workload running on
an ARM and an x86 server interconnected by a high-speed
network.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Lustig:2017:ASC,
author = "Daniel Lustig and Andrew Wright and Alexandros
Papakonstantinou and Olivier Giroux",
title = "Automated Synthesis of Comprehensive Memory Model
Litmus Test Suites",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "661--675",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037723",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The memory consistency model is a fundamental part of
any shared memory architecture or programming model.
Modern weak memory models are notoriously difficult to
define and to implement correctly. Most real-world
programming languages, compilers, and
(micro)architectures therefore rely heavily on
black-box testing methodologies. The success of such
techniques requires that the suite of litmus tests used
to perform the testing be comprehensive--it should
ideally stress all obscure corner cases of the model
and of its implementation. Most litmus test suites
today are generated from some combination of manual
effort and randomization; however, the complex and
subtle nature of contemporary memory models means that
manual effort is both error-prone and subject to
incomplete coverage. This paper presents a methodology
for synthesizing comprehensive litmus test suites
directly from a memory model specification. By
construction, these suites contain all tests satisfying
a minimality criterion: that no synchronization
mechanism in the test can be weakened without causing
new behaviors to become observable. We formalize this
notion using the Alloy modeling language, and we apply
it to a number of existing and newly-proposed memory
models. Our results show not only that this synthesis
technique can automatically reproduce all
manually-generated tests from existing suites, but also
that it discovers new tests that are not as well
studied.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Liu:2017:DAD,
author = "Haopeng Liu and Guangpu Li and Jeffrey F. Lukman and
Jiaxin Li and Shan Lu and Haryadi S. Gunawi and Chen
Tian",
title = "{DCatch}: Automatically Detecting Distributed
Concurrency Bugs in Cloud Systems",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "677--691",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037735",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In big data and cloud computing era, reliability of
distributed systems is extremely important.
Unfortunately, distributed concurrency bugs, referred
to as DCbugs, widely exist. They hide in the large
state space of distributed cloud systems and manifest
non-deterministically depending on the timing of
distributed computation and communication. Effective
techniques to detect DCbugs are desired. This paper
presents a pilot solution, DCatch, in the world of
DCbug detection. DCatch predicts DCbugs by analyzing
correct execution of distributed systems. To build
DCatch, we design a set of happens-before rules that
model a wide variety of communication and concurrency
mechanisms in real-world distributed cloud systems. We
then build runtime tracing and trace analysis tools to
effectively identify concurrent conflicting memory
accesses in these systems. Finally, we design tools to
help prune false positives and trigger DCbugs. We have
evaluated DCatch on four representative open-source
distributed cloud systems, Cassandra, Hadoop MapReduce,
HBase, and ZooKeeper. By monitoring correct execution
of seven workloads on these systems, DCatch reports 32
DCbugs, with 20 of them being truly harmful.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Mashtizadeh:2017:TPD,
author = "Ali Jos{\'e} Mashtizadeh and Tal Garfinkel and David
Terei and David Mazieres and Mendel Rosenblum",
title = "Towards Practical Default-On Multi-Core Record\slash
Replay",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "693--708",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037751",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "We present Castor, a record/replay system for
multi-core applications that provides consistently low
and predictable overheads. With Castor, developers can
leave record and replay on by default, making it
practical to record and reproduce production bugs, or
employ fault tolerance to recover from hardware
failures. Castor is inspired by several observations:
First, an efficient mechanism for logging
non-deterministic events is critical for recording
demanding workloads with low overhead. Through careful
use of hardware we were able to increase log throughput
by 10x or more, e.g., we could record a server handling
10x more requests per second for the same record
overhead. Second, most applications can be recorded
without modifying source code by using the compiler to
instrument language level sources of non-determinism,
in conjunction with more familiar techniques like
shared library interposition. Third, while Castor
cannot deterministically replay all data races, this
limitation is generally unimportant in practice,
contrary to what prior work has assumed. Castor
currently supports applications written in C, C++, and
Go on FreeBSD. We have evaluated Castor on parallel and
server workloads, including a commercial implementation
of memcached in Go, which runs Castor in production.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Huang:2017:PSA,
author = "Jian Huang and Michael Allen-Bond and Xuechen Zhang",
title = "{Pallas}: Semantic-Aware Checking for Finding Deep
Bugs in Fast Path",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "709--722",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037743",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Software optimization is constantly a serious concern
for developing high-performance systems. To accelerate
the workflow execution of a specific functionality,
software developers usually define and implement a fast
path to speed up the critical and commonly executed
functions in the workflow. However, producing a
bug-free fast path is nontrivial. Our study on the
Linux kernel discloses that a committed fast path can
have up to 19 follow-up patches for bug fixing, and
most of them are deep semantic bugs, which are
difficult to be pinpointed by existing bug-finding
tools. In this paper, we present such a new category of
software bugs based on our fast-path bug study across
various system software including virtual memory
manager, file systems, network, and device drivers. We
investigate their root causes and identify five
error-prone aspects in a fast path: path state, trigger
condition, path output, fault handling, and assistant
data structure. We find that many of the deep bugs can
be prevented by applying static analysis incorporating
simple semantic information. We extract a set of rules
based on our findings and build a toolkit PALLAS to
check fast-path bugs. The evaluation results show that
PALLAS can effectively reveal fast-path bugs in a
variety of systems including Linux kernel, mobile
operating system, software-defined networking system,
and web browser.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Kotra:2017:HSC,
author = "Jagadish B. Kotra and Narges Shahidi and Zeshan A.
Chishti and Mahmut T. Kandemir",
title = "Hardware-Software Co-design to Mitigate {DRAM} Refresh
Overheads: a Case for Refresh-Aware Process
Scheduling",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "723--736",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037724",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "DRAM cells need periodic refresh to maintain data
integrity. With high capacity DRAMs, DRAM refresh poses
a significant performance bottleneck as the number of
rows to be refreshed (and hence the refresh cycle time,
tRFC) with each refresh command increases. Modern day
DRAMs perform refresh at a rank-level, while LPDDRs
used in mobile environments support refresh at a
per-bank level. Rank-level refresh degrades the
performance significantly since none of the banks in a
rank can serve the on-demand requests. Per-bank refresh
alleviates some of the performance bottlenecks as the
other banks in a rank are available for on-demand
requests. Typical DRAM retention time is in the order
several of milliseconds, viz, 64msec for environments
operating in temperatures below 85 deg C and 32msec for
environments operating above 85 deg C. With systems
moving towards increased consolidation (ex: virtualized
environments), DRAM refresh becomes a significant
bottleneck as it reduces the available overall DRAM
bandwidth per task. In this work, we propose a
hardware-software co-design to mitigate DRAM refresh
overheads by exposing the hardware address mapping and
DRAM refresh schedule to the Operating System. We
propose a novel DRAM refresh-aware process scheduling
algorithm in OS which schedules applications on cores
such that none of the on-demand requests from the
application are stalled by refreshes. Extensive
evaluation of our proposed co-design on
multi-programmed SPEC CPU2006 workloads show
significant performance improvement compared to the
previously proposed hardware only approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Kim:2017:KPC,
author = "Jinchun Kim and Elvira Teran and Paul V. Gratz and
Daniel A. Jim{\'e}nez and Seth H. Pugsley and Chris
Wilkerson",
title = "Kill the Program Counter: Reconstructing Program
Behavior in the Processor Cache Hierarchy",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "737--749",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037701",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Data prefetching and cache replacement algorithms have
been intensively studied in the design of high
performance microprocessors. Typically, the data
prefetcher operates in the private caches and does not
interact with the replacement policy in the shared
Last-Level Cache (LLC). Similarly, most replacement
policies do not consider demand and prefetch requests
as different types of requests. In particular, program
counter (PC)-based replacement policies cannot learn
from prefetch requests since the data prefetcher does
not generate a PC value. PC-based policies can also be
negatively affected by compiler optimizations. In this
paper, we propose a holistic cache management technique
called Kill-the-PC (KPC) that overcomes the weaknesses
of traditional prefetching and replacement policy
algorithms. KPC cache management has three novel
contributions. First, a prefetcher which approximates
the future use distance of prefetch requests based on
its prediction confidence. Second, a simple replacement
policy provides similar or better performance than
current state-of-the-art PC-based prediction using
global hysteresis. Third, KPC integrates prefetching
and replacement policy into a whole system which is
greater than the sum of its parts. Information from the
prefetcher is used to improve the performance of the
replacement policy and vice-versa. Finally, KPC removes
the need to propagate the PC through entire on-chip
cache hierarchy while providing a holistic cache
management approach with better performance than
state-of-the-art PC-, and non-PC-based schemes. Our
evaluation shows that KPC provides 8\% better
performance than the best combination of existing
prefetcher and replacement policy for multi-core
workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Gao:2017:TSE,
author = "Mingyu Gao and Jing Pu and Xuan Yang and Mark Horowitz
and Christos Kozyrakis",
title = "{TETRIS}: Scalable and Efficient Neural Network
Acceleration with {$3$D} Memory",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "751--764",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037702",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The high accuracy of deep neural networks (NNs) has
led to the development of NN accelerators that improve
performance by two orders of magnitude. However,
scaling these accelerators for higher performance with
increasingly larger NNs exacerbates the cost and energy
overheads of their memory systems, including the
on-chip SRAM buffers and the off-chip DRAM channels.
This paper presents the hardware architecture and
software scheduling and partitioning techniques for
TETRIS, a scalable NN accelerator using 3D memory.
First, we show that the high throughput and low energy
characteristics of 3D memory allow us to rebalance the
NN accelerator design, using more area for processing
elements and less area for SRAM buffers. Second, we
move portions of the NN computations close to the DRAM
banks to decrease bandwidth pressure and increase
performance and energy efficiency. Third, we show that
despite the use of small SRAM buffers, the presence of
3D memory simplifies dataflow scheduling for NN
computations. We present an analytical scheduling
scheme that matches the efficiency of schedules derived
through exhaustive search. Finally, we develop a hybrid
partitioning scheme that parallelizes the NN
computations over multiple accelerators. Overall, we
show that TETRIS improves the performance by 4.1x and
reduces the energy by 1.5x over NN accelerators with
conventional, low-power DRAM memory systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Song:2017:HBA,
author = "Wonjun Song and Gwangsun Kim and Hyungjoon Jung and
Jongwook Chung and Jung Ho Ahn and Jae W. Lee and John
Kim",
title = "History-Based Arbitration for Fairness in
Processor-Interconnect of {NUMA} Servers",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "765--777",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037753",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "NUMA (non-uniform memory access) servers are commonly
used in high-performance computing and datacenters.
Within each server, a processor-interconnect (e.g.,
Intel QPI, AMD HyperTransport) is used to communicate
between the different sockets or nodes. In this work,
we explore the impact of the processor-interconnect on
overall performance --- in particular, the performance
unfairness caused by processor-interconnect
arbitration. It is well known that locally-fair
arbitration does not guarantee globally-fair bandwidth
sharing as closer nodes receive more bandwidth in a
multi-hop network. However, this work demonstrates that
the opposite can occur in a commodity NUMA server where
remote nodes receive higher bandwidth (and perform
better). We analyze this problem and identify that this
occurs because of external concentration used in router
micro-architectures for processor-interconnects without
globally-aware arbitration. While accessing remote
memory can occur in any NUMA system, performance
unfairness (or performance variation) is more critical
in cloud computing and virtual machines with shared
resources. We demonstrate how this unfairness creates
significant performance variation when a workload is
executed on the Xen virtualization platform. We then
provide analysis using synthetic workloads to better
understand the source of unfairness and eliminate the
impact of other shared resources, including the shared
last-level cache and main memory. To provide fairness,
we propose a novel, history-based arbitration that
tracks the history of arbitration grants made in the
previous history window. A weighted arbitration is done
based on the history to provide global fairness.
Through simulations, we show our proposed history-based
arbitration can provide global fairness and minimize
the processor- interconnect performance unfairness at
low cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Misra:2017:ELT,
author = "Pulkit A. Misra and Jeffrey S. Chase and Johannes
Gehrke and Alvin R. Lebeck",
title = "Enabling Lightweight Transactions with Precision
Time",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "779--794",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037722",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Distributed transactional storage is an important
service in today's data centers. Achieving high
performance without high complexity is often a
challenge for these systems due to sophisticated
consistency protocols and multiple layers of
abstraction. In this paper we show how to combine two
emerging technologies---Software-Defined Flash (SDF)
and precise synchronized clocks---to improve
performance and reduce complexity for transactional
storage within the data center. We present a
distributed transactional system (called MILANA) as a
layer above a durable multi-version key-value store
(called SEMEL) for read-heavy workloads within a data
center. SEMEL exploits write behavior of SSDs to
maintain a time-ordered sequence of versions for each
key efficiently and durably. MILANA adds a variant of
optimistic concurrency control above SEMEL's API to
service read requests from a consistent snapshot and to
enable clients to make fast local commit or abort
decisions for read-only transactions. Experiments with
the prototype reveal up to 43\% lower transaction abort
rates using IEEE Precision Time Protocol (PTP) vs. the
standard Network Time Protocol (NTP). Under the Retwis
benchmark, client-local validation of read-only
transactions yields a 35\% reduction in latency and
55\% increase in transaction throughput.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Liu:2017:ITN,
author = "Ming Liu and Liang Luo and Jacob Nelson and Luis Ceze
and Arvind Krishnamurthy and Kishore Atreya",
title = "{IncBricks}: Toward In-Network Computation with an
In-Network Cache",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "795--809",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037731",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The emergence of programmable network devices and the
increasing data traffic of datacenters motivate the
idea of in-network computation. By offloading compute
operations onto intermediate networking devices (e.g.,
switches, network accelerators, middleboxes), one can
(1) serve network requests on the fly with low latency;
(2) reduce datacenter traffic and mitigate network
congestion; and (3) save energy by running servers in a
low-power mode. However, since (1) existing switch
technology doesn't provide general computing
capabilities, and (2) commodity datacenter networks are
complex (e.g., hierarchical fat-tree topologies,
multipath communication), enabling in-network
computation inside a datacenter is challenging. In this
paper, as a step towards in-network computing, we
present IncBricks, an in-network caching fabric with
basic computing primitives. IncBricks is a
hardware-software co-designed system that supports
caching in the network using a programmable network
middlebox. As a key-value store accelerator, our
prototype lowers request latency by over 30\% and
doubles throughput for 1024 byte values in a common
cluster configuration. Our results demonstrate the
effectiveness of in-network computing and that
efficient datacenter network request processing is
possible if we carefully split the computation across
the different programmable computing elements in a
datacenter, including programmable switches, network
accelerators, and end hosts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Akturk:2017:AAA,
author = "Ismail Akturk and Ulya R. Karpuzcu",
title = "{AMNESIAC}: Amnesic Automatic Computer",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "811--824",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037741",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Due to imbalances in technology scaling, the energy
consumption of data storage and communication by far
exceeds the energy consumption of actual data
production, i.e., computation. As a consequence,
recomputing data can become more energy efficient than
storing and retrieving precomputed data. At the same
time, recomputation can relax the pressure on the
memory hierarchy and the communication bandwidth. This
study hence assesses the energy efficiency prospects of
trading computation for communication. We introduce an
illustrative proof-of-concept design, identify
practical limitations, and provide design guidelines.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Bai:2017:VRE,
author = "Yuxin Bai and Victor W. Lee and Engin Ipek",
title = "Voltage Regulator Efficiency Aware Power Management",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "1",
pages = "825--838",
month = mar,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3093337.3037717",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Jun 5 18:01:58 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Conventional off-chip voltage regulators are typically
bulky and slow, and are inefficient at exploiting
system and workload variability using Dynamic Voltage
and Frequency Scaling (DVFS). On-die integration of
voltage regulators has the potential to increase the
energy efficiency of computer systems by enabling power
control at a fine granularity in both space and time.
The energy conversion efficiency of on-chip regulators,
however, is typically much lower than off-chip
regulators, which results in significant energy losses.
Fine-grained power control and high voltage regulator
efficiency are difficult to achieve simultaneously,
with either emerging on-chip or conventional off-chip
regulators. A voltage conversion framework that relies
on a hierarchy of off-chip switching regulators and
on-chip linear regulators is proposed to enable
fine-grained power control with a regulator efficiency
greater than 90\%. A DVFS control policy that is based
on a reinforcement learning (RL) approach is developed
to exploit the proposed framework. Per-core RL agents
learn and improve their control policies independently,
while retaining the ability to coordinate their actions
to accomplish system level power management objectives.
When evaluated on a mix of 14 parallel and 13
multiprogrammed workloads, the proposed voltage
conversion framework achieves 18\% greater energy
efficiency than a conventional framework that uses
on-chip switching regulators. Moreover, when the RL
based DVFS control policy is used to control the
proposed voltage conversion framework, the system
achieves a 21\% higher energy efficiency over a
baseline oracle policy with coarse-grained power
control capability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
remark = "ASPLOS'17 conference proceedings",
}
@Article{Jouppi:2017:DPA,
author = "Norman P. Jouppi and Cliff Young and Nishant Patil and
David Patterson and Gaurav Agrawal and Raminder Bajwa
and Sarah Bates and Suresh Bhatia and Nan Boden and Al
Borchers and Rick Boyle and Pierre-luc Cantin and
Clifford Chao and Chris Clark and Jeremy Coriell and
Mike Daley and Matt Dau and Jeffrey Dean and Ben Gelb
and Tara Vazir Ghaemmaghami and Rajendra Gottipati and
William Gulland and Robert Hagmann and C. Richard Ho
and Doug Hogberg and John Hu and Robert Hundt and Dan
Hurt and Julian Ibarz and Aaron Jaffey and Alek
Jaworski and Alexander Kaplan and Harshit Khaitan and
Daniel Killebrew and Andy Koch and Naveen Kumar and
Steve Lacy and James Laudon and James Law and Diemthu
Le and Chris Leary and Zhuyuan Liu and Kyle Lucke and
Alan Lundin and Gordon MacKean and Adriana Maggiore and
Maire Mahony and Kieran Miller and Rahul Nagarajan and
Ravi Narayanaswami and Ray Ni and Kathy Nix and Thomas
Norrie and Mark Omernick and Narayana Penukonda and
Andy Phelps and Jonathan Ross and Matt Ross and Amir
Salek and Emad Samadiani and Chris Severn and Gregory
Sizikov and Matthew Snelham and Jed Souter and Dan
Steinberg and Andy Swing and Mercedes Tan and Gregory
Thorson and Bo Tian and Horia Toma and Erick Tuttle and
Vijay Vasudevan and Richard Walter and Walter Wang and
Eric Wilcox and Doe Hyun Yoon",
title = "In-Datacenter Performance Analysis of a Tensor
Processing Unit",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "1--12",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080246",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Many architects believe that major improvements in
cost-energy-performance must now come from
domain-specific hardware. This paper evaluates a custom
ASIC---called a Tensor Processing Unit (TPU) ---
deployed in datacenters since 2015 that accelerates the
inference phase of neural networks (NN). The heart of
the TPU is a 65,536 8-bit MAC matrix multiply unit that
offers a peak throughput of 92 TeraOps/second (TOPS)
and a large (28 MiB) software-managed on-chip memory.
The TPU's deterministic execution model is a better
match to the 99th-percentile response-time requirement
of our NN applications than are the time-varying
optimizations of CPUs and GPUs that help average
throughput more than guaranteed latency. The lack of
such features helps explain why, despite having myriad
MACs and a big memory, the TPU is relatively small and
low power. We compare the TPU to a server-class Intel
Haswell CPU and an Nvidia K80 GPU, which are
contemporaries deployed in the same datacenters. Our
workload, written in the high-level TensorFlow
framework, uses production NN applications (MLPs, CNNs,
and LSTMs) that represent 95\% of our datacenters' NN
inference demand. Despite low utilization for some
applications, the TPU is on average about 15X --- 30X
faster than its contemporary GPU or CPU, with TOPS/Watt
about 30X --- 80X higher. Moreover, using the CPU's
GDDR5 memory in the TPU would triple achieved TOPS and
raise TOPS/Watt to nearly 70X the GPU and 200X the
CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Venkataramani:2017:SSC,
author = "Swagath Venkataramani and Ashish Ranjan and Subarno
Banerjee and Dipankar Das and Sasikanth Avancha and
Ashok Jagannathan and Ajaya Durg and Dheemanth Nagaraj
and Bharat Kaul and Pradeep Dubey and Anand
Raghunathan",
title = "{ScaleDeep}: a Scalable Compute Architecture for
Learning and Evaluating Deep Networks",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "13--26",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080244",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Deep Neural Networks (DNNs) have demonstrated
state-of-the-art performance on a broad range of tasks
involving natural language, speech, image, and video
processing, and are deployed in many real world
applications. However, DNNs impose significant
computational challenges owing to the complexity of the
networks and the amount of data they process, both of
which are projected to grow in the future. To improve
the efficiency of DNNs, we propose ScaleDeep, a dense,
scalable server architecture, whose processing, memory
and interconnect subsystems are specialized to leverage
the compute and communication characteristics of DNNs.
While several DNN accelerator designs have been
proposed in recent years, the key difference is that
ScaleDeep primarily targets DNN training, as opposed to
only inference or evaluation. The key architectural
features from which ScaleDeep derives its efficiency
are: (i) heterogeneous processing tiles and chips to
match the wide diversity in computational
characteristics (FLOPs and Bytes/FLOP ratio) that
manifest at different levels of granularity in DNNs,
(ii) a memory hierarchy and 3-tiered interconnect
topology that is suited to the memory access and
communication patterns in DNNs, (iii) a low-overhead
synchronization mechanism based on hardware data-flow
trackers, and (iv) methods to map DNNs to the proposed
architecture that minimize data movement and improve
core utilization through nested pipelining. We have
developed a compiler to allow any DNN topology to be
programmed onto ScaleDeep, and a detailed architectural
simulator to estimate performance and energy. The
simulator incorporates timing and power models of
ScaleDeep's components based on synthesis to Intel's
14nm technology. We evaluate an embodiment of ScaleDeep
with 7032 processing tiles that operates at 600 MHz and
has a peak performance of 680 TFLOPs (single precision)
and 1.35 PFLOPs (half-precision) at 1.4KW. Across 11
state-of-the-art DNNs containing 0.65M-14.9M neurons
and 6.8M-145.9M weights, including winners from 5 years
of the ImageNet competition, ScaleDeep demonstrates
6x-28x speedup at iso-power over the state-of-the-art
performance on GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Parashar:2017:SAC,
author = "Angshuman Parashar and Minsoo Rhu and Anurag Mukkara
and Antonio Puglielli and Rangharajan Venkatesan and
Brucek Khailany and Joel Emer and Stephen W. Keckler
and William J. Dally",
title = "{SCNN}: an Accelerator for Compressed-sparse
Convolutional Neural Networks",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "27--40",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080254",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Convolutional Neural Networks (CNNs) have emerged as a
fundamental technology for machine learning. High
performance and extreme energy efficiency are critical
for deployments of CNNs, especially in mobile platforms
such as autonomous vehicles, cameras, and electronic
personal assistants. This paper introduces the Sparse
CNN (SCNN) accelerator architecture, which improves
performance and energy efficiency by exploiting the
zero-valued weights that stem from network pruning
during training and zero-valued activations that arise
from the common ReLU operator. Specifically, SCNN
employs a novel dataflow that enables maintaining the
sparse weights and activations in a compressed
encoding, which eliminates unnecessary data transfers
and reduces storage requirements. Furthermore, the SCNN
dataflow facilitates efficient delivery of those
weights and activations to a multiplier array, where
they are extensively reused; product accumulation is
performed in a novel accumulator array. On contemporary
neural networks, SCNN can improve both performance and
energy by a factor of 2.7x and 2.3x, respectively, over
a comparably provisioned dense CNN accelerator.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Cherupalli:2017:BPA,
author = "Hari Cherupalli and Henry Duwe and Weidong Ye and
Rakesh Kumar and John Sartori",
title = "Bespoke Processors for Applications with Ultra-low
Area and Power Constraints",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "41--54",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080247",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A large number of emerging applications such as
implantables, wearables, printed electronics, and IoT
have ultra-low area and power constraints. These
applications rely on ultra-low-power general purpose
microcontrollers and microprocessors, making them the
most abundant type of processor produced and used
today. While general purpose processors have several
advantages, such as amortized development cost across
many applications, they are significantly
over-provisioned for many area- and power-constrained
systems, which tend to run only one or a small number
of applications over their lifetime. In this paper, we
make a case for bespoke processor design, an automated
approach that tailors a general purpose processor IP to
a target application by removing all gates from the
design that can never be used by the application. Since
removed gates are never used by an application, bespoke
processors can achieve significantly lower area and
power than their general purpose counterparts without
any performance degradation. Also, gate removal can
expose additional timing slack that can be exploited to
increase area and power savings or performance of a
bespoke design. Bespoke processor design reduces area
and power by 62\% and 50\%, on average, while
exploiting exposed timing slack improves average power
savings to 65\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2017:PGF,
author = "Yajing Chen and Shengshuo Lu and Cheng Fu and David
Blaauw and Ronald {Dreslinski, Jr.} and Trevor Mudge
and Hun-Seok Kim",
title = "A Programmable {Galois} Field Processor for the
{Internet of Things}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "55--68",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080227",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper investigates the feasibility of a unified
processor architecture to enable error coding
flexibility and secure communication in low power
Internet of Things (IoT) wireless networks. Error
coding flexibility for wireless communication allows
IoT applications to exploit the large tradeoff space in
data rate, link distance and energy-efficiency. As a
solution, we present a light-weight Galois Field (GF)
processor to enable energy-efficient block coding and
symmetric/asymmetric cryptography kernel processing for
a wide range of GF sizes (2m, m = 2, 3, ..., 233) and
arbitrary irreducible polynomials. Program directed
connections among primitive GF arithmetic units enable
dynamically configured parallelism to efficiently
perform either four-way SIMD 5- to 8-bit GF operations,
including multiplicative inverse, or a wide bit-width
(e.g., 32-bit) GF product in a single cycle. To
illustrate our ideas, we synthesized our GF processor
in a 28nm technology. Compared to a baseline software
implementation optimized for a general purpose ARM M0+
processor, our processor exhibits a 5-20 x speedup for
a range of error correction codes and
symmetric/asymmetric cryptography applications.
Additionally, our proposed GF processor consumes 431 $
\mu $W at 0.9V and 100MHz, and achieves 35.5pJ/b energy
efficiency while executing AES operations at 12.2Mbps.
We achieve this within an area of 0.01mm2.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2017:XCE,
author = "Aosen Wang and Lizhong Chen and Wenyao Xu",
title = "{XPro}: a Cross-End Processing Architecture for Data
Analytics in Wearables",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "69--80",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080219",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Wearable computing systems have spurred many
opportunities to continuously monitor human bodies with
sensors worn on or implanted in the body. These
emerging platforms have started to revolutionize many
fields, including healthcare and wellness applications,
particularly when integrated with intelligent analytic
capabilities. However, a significant challenge that
computer architects are facing is how to embed
sophisticated analytic capabilities in wearable
computers in an energy-efficient way while not
compromising system performance. In this paper, we
present XPro, a novel cross-end analytic engine
architecture for wearable computing systems. The
proposed cross-end architecture is able to realize a
generic classification design across wearable sensors
and a data aggregator with high energy-efficiency. To
facilitate the practical use of XPro, we also develop
an Automatic XPro Generator that formally generates
XPro instances according to specific design
constraints. As a proof of concept, we study the design
and implementation of XPro with six different health
applications. Evaluation results show that, compared
with state-of-the-art methods, XPro can increase the
battery life of the sensor node by 1.6-2.4X while at
the same time reducing system delay by 15.6-60.8\% for
wearable computing systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Weisse:2017:RLC,
author = "Ofir Weisse and Valeria Bertacco and Todd Austin",
title = "Regaining Lost Cycles with {HotCalls}: a Fast
Interface for {SGX} Secure Enclaves",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "81--93",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080208",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Intel's SGX secure execution technology allows running
computations on secret data using untrusted servers.
While recent work showed how to port applications and
large-scale computations to run under SGX, the
performance implications of using the technology
remains an open question. We present the first
comprehensive quantitative study to evaluate the
performance of SGX. We show that straightforward use of
SGX library primitives for calling functions add
between 8,200 --- 17,000 cycles overhead, compared to
150 cycles of a typical system call. We quantify the
performance impact of these library calls and show that
in applications with high system calls frequency, such
as memcached, openVPN, and lighttpd, which all have
high bandwidth network requirements, the performance
degradation may be as high as 79\%. We investigate the
sources of this performance degradation by leveraging a
new set of microbenchmarks for SGX-specific operations
such as enclave entry-calls and out-calls, and
encrypted memory I/O accesses. We leverage the insights
we gain from these analyses to design a new SGX
interface framework HotCalls. HotCalls are based on a
synchronization spin-lock mechanism and provide a
13-27x speedup over the default interface. It can
easily be integrated into existing code, making it a
practical solution. Compared to a baseline SGX
implementation of memcached, openVPN, and lighttpd ---
we show that using the new interface boosts the
throughput by 2.6-3.7x, and reduces application latency
by 62-74\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Aga:2017:ISM,
author = "Shaizeen Aga and Satish Narayanasamy",
title = "{InvisiMem}: Smart Memory Defenses for Memory Bus Side
Channel",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "94--106",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080232",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "A practically feasible low-overhead hardware design
that provides strong defenses against memory bus side
channel remains elusive. This paper observes that smart
memory, memory with compute capability and a packetized
interface, can dramatically simplify this problem.
InvisiMem expands the trust base to include the logic
layer in the smart memory to implement cryptographic
primitives, which aid in addressing several memory bus
side channel vulnerabilities efficiently. This allows
the secure host processor to send encrypted addresses
over the untrusted memory bus, and thereby eliminates
the need for expensive address obfuscation techniques
based on Oblivious RAM (ORAM). In addition, smart
memory enables efficient solutions for ensuring
freshness without using expensive Merkle trees, and
mitigates memory bus timing channel using constant
heart-beat packets. We demonstrate that InvisiMem
designs have one to two orders of magnitude of lower
overheads for performance, space, energy, and memory
bandwidth, compared to prior solutions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Awad:2017:OLO,
author = "Amro Awad and Yipeng Wang and Deborah Shands and Yan
Solihin",
title = "{ObfusMem}: a Low-Overhead Access Obfuscation for
Trusted Memories",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "107--119",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080230",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Trustworthy software requires strong privacy and
security guarantees from a secure trust base in
hardware. While chipmakers provide hardware support for
basic security and privacy primitives such as enclaves
and memory encryption. these primitives do not address
hiding of the memory access pattern, information about
which may enable attacks on the system or reveal
characteristics of sensitive user data.
State-of-the-art approaches to protecting the access
pattern are largely based on Oblivious RAM (ORAM).
Unfortunately, current ORAM implementations suffer from
very significant practicality and overhead concerns,
including roughly an order of magnitude slowdown, more
than 100\% memory capacity overheads, and the potential
for system deadlock. Memory technology trends are
moving towards 3D and 2.5D integration, enabling
significant logic capabilities and sophisticated memory
interfaces. Leveraging the trends, we propose a new
approach to access pattern obfuscation, called
ObfusMem. ObfusMem adds the memory to the trusted
computing base and incorporates cryptographic engines
within the memory. ObfusMem encrypts commands and
addresses on the memory bus, hence the access pattern
is cryptographically obfuscated from external
observers. Our evaluation shows that ObfusMem incurs an
overhead of 10.9\% on average, which is about an order
of magnitude faster than ORAM implementations.
Furthermore, ObfusMem does not incur capacity overheads
and does not amplify writes. We analyze and compare the
security protections provided by ObfusMem and ORAM, and
highlight their differences.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Khatamifard:2017:TTA,
author = "S. Karen Khatamifard and Longfei Wang and Weize Yu and
Sel{\c{c}}uk K{\"o}se and Ulya R. Karpuzcu",
title = "{ThermoGater}: Thermally-Aware On-Chip Voltage
Regulation",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "120--132",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080250",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Tailoring the operating voltage to fine-grain temporal
changes in the power and performance needs of the
workload can effectively enhance power efficiency.
Therefore, power-limited computing platforms of today
widely deploy integrated (i.e., on-chip) voltage
regulation which enables fast fine-grain voltage
control. Voltage regulators convert and distribute
power from an external energy source to the processor.
Unfortunately, power conversion loss is inevitable and
projected integrated regulator designs are unlikely to
eliminate this loss even asymptotically. Reconfigurable
power delivery by selective shut-down, i.e., gating, of
distributed on-chip regulators in response to
spatio-temporal changes in power demand can sustain
operation at the minimum conversion loss. However, even
the minimum conversion loss is sizable, and as
conversion loss gets dissipated as heat, on-chip
regulators can easily cause thermal emergencies due to
their small footprint. Although reconfigurable
distributed on-chip power delivery is emerging as a new
design paradigm to enforce sustained operation at
minimum possible power conversion loss, thermal
implications have been overlooked at the architectural
level. This paper hence provides a thermal
characterization. We introduce ThermoGater, an
architectural governor for a collection of practical,
thermally-aware regulator gating policies to mitigate
(if not prevent) regulator-induced thermal emergencies,
which also consider potential implications for voltage
noise. Practical ThermoGater policies can not only
sustain minimum power conversion loss throughout
execution effectively, but also keep the maximum
temperature (thermal gradient) across chip within
0.6${}^\circ $C (0.3${}^\circ $C) on average in
comparison to thermally-optimal oracular regulator
gating, while the maximum voltage noise stays within
1.0\% of the best case voltage noise profile.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yang:2017:PIP,
author = "Hailong Yang and Quan Chen and Moeiz Riaz and Zhongzhi
Luan and Lingjia Tang and Jason Mars",
title = "{PowerChief}: Intelligent Power Allocation for
Multi-Stage Applications to Improve Responsiveness on
Power Constrained {CMP}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "133--146",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080224",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern user facing applications consist of multiple
processing stages with a number of service instances in
each stage. The latency profile of these multi-stage
applications is intrinsically variable, making it
challenging to provide satisfactory responsiveness.
Given a limited power budget, improving the end-to-end
latency requires intelligently boosting the bottleneck
service across stages using multiple boosting
techniques. However, prior work fail to acknowledge the
multi-stage nature of user-facing applications and
perform poorly in improving responsiveness on power
constrained CMP, as they are unable to accurately
identify bottleneck service and apply the boosting
techniques adaptively. In this paper, we present
PowerChief, a runtime framework that (1) provides joint
design of service and query to monitor the latency
statistics across service stages and accurately
identifies the bottleneck service during runtime; (2)
adaptively chooses the boosting technique to accelerate
the bottleneck service with improved responsiveness;
(3) dynamically reallocates the constrained power
budget across service stages to accommodate the chosen
boosting technique. Evaluated with real world
multi-stage applications, PowerChief improves the
average latency by 20.3x and 32.4x (99\% tail latency
by 13.3x and 19.4x) for Sirius and Natural Language
Processing applications respectively compared to
stage-agnostic power allocation. In addition, for the
given QoS target, PowerChief reduces the power
consumption of Sirius and Web Search applications by
23\% and 33\% respectively over prior work.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ravi:2017:CCH,
author = "Gokul Subramanian Ravi and Mikko H. Lipasti",
title = "{CHARSTAR: Clock Hierarchy Aware Resource Scaling in
Tiled ARchitectures}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "147--160",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080212",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "High-performance architectures are over-provisioned
with resources to extract the maximum achievable
performance out of applications. Two sources of
avoidable power dissipation are the leakage power from
underutilized resources, along with clock power from
the clock hierarchy that feeds these resources. Most
reconfiguration mechanisms either focus solely on power
gating execution resources alone or in addition, simply
turn off the immediate clock tree segment which
supplied the clock to those resources. These proposals
neither attempt to gate further up the clock hierarchy
nor do they involve the clock hierarchy in influencing
the reconfiguration decisions. The primary contribution
of CHARSTAR is optimizing reconfiguration mechanisms to
become clock hierarchy aware. Resource gating decisions
are cognizant of the power consumed by each node in the
clock hierarchy and additionally, entire branches of
the clock tree are greedily shut down whenever
possible. The CHARSTAR design is further optimized for
balanced spatio-temporal reconfiguration and also
enables efficient joint control of resource and
frequency scaling. The proposal is implemented by
leveraging the inherent advantages of spatial
architectures, utilizing a control mechanism driven by
a lightweight offline trained neural predictor.
CHARSTAR, when deployed on the CRIB tiled
microarchitecture, improves processor energy efficiency
by 20-25\%, with efficiency improvements of roughly 2x
in comparison to a naive power gating mechanism.
Alternatively, it improves performance by 10-20\% under
varying power and energy constraints.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sinclair:2017:CRS,
author = "Matthew D. Sinclair and Johnathan Alsop and Sarita V.
Adve",
title = "Chasing Away {RAts}: Semantics and Evaluation for
Relaxed Atomics on Heterogeneous Systems",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "161--174",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080206",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "An unambiguous and easy-to-understand memory
consistency model is crucial for ensuring correct
synchronization and guiding future design of
heterogeneous systems. In a widely adopted approach,
the memory model guarantees sequential consistency (SC)
as long as programmers obey certain rules. The popular
data-race-free-0 (DRF0) model exemplifies this
SC-centric approach by requiring programmers to avoid
data races. Recent industry models, however, have
extended such SC-centric models to incorporate relaxed
atomics. These extensions can improve performance, but
are difficult to specify formally and use correctly.
This work addresses the impact of relaxed atomics on
consistency models for heterogeneous systems in two
ways. First, we introduce a new model,
Data-Race-Free-Relaxed (DRFrlx), that extends DRF0 to
provide SC-centric semantics for the common use cases
of relaxed atomics. Second, we evaluate the performance
of relaxed atomics in CPU-GPU systems for these use
cases. We find mixed results --- for most cases,
relaxed atomics provide only a small benefit in
execution time, but for some cases, they help
significantly (e.g., up to 51\% for DRFrlx over
DRF0).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shin:2017:HLL,
author = "Seunghee Shin and James Tuck and Yan Solihin",
title = "Hiding the Long Latency of Persist Barriers Using
Speculative Execution",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "175--186",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080240",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Byte-addressable non-volatile memory technology is
emerging as an alternative for DRAM for main memory.
This new Non-Volatile Main Memory (NVMM) allows
programmers to store important data in data structures
in memory instead of serializing it to the file system,
thereby providing a substantial performance boost.
However, modern systems reorder memory operations and
utilize volatile caches for better performance, making
it difficult to ensure a consistent state in NVMM.
Intel recently announced a new set of persistence
instructions, clflushopt, clwb, and pcommit. These new
instructions make it possible to implement fail-safe
code on NVMM, but few workloads have been written or
characterized using these new instructions. In this
work, we describe how these instructions work and how
they can be used to implement write-ahead logging based
transactions. We implement several common data
structures and kernels and evaluate the performance
overhead incurred over traditional non-persistent
implementations. In particular, we find that
persistence instructions occur in clusters along with
expensive fence operations, they have long latency, and
they add a significant execution time overhead, on
average by 20.3\% over code with logging but without
fence instructions to order persists. To deal with this
overhead and alleviate the performance bottleneck, we
propose to speculate past long latency persistency
operations using checkpoint-based processing. Our
speculative persistence architecture reduces the
execution time overheads to only 3.6\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ros:2017:NSL,
author = "Alberto Ros and Trevor E. Carlson and Mehdi Alipour
and Stefanos Kaxiras",
title = "Non-Speculative Load-Load Reordering in {TSO}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "187--200",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080220",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In Total Store Order memory consistency (TSO), loads
can be speculatively reordered to improve performance.
If a load-load reordering is seen by other cores,
speculative loads must be squashed and re-executed. In
architectures with an unordered interconnection network
and directory coherence, this has been the established
view for decades. We show, for the first time, that it
is not necessary to squash and re-execute speculatively
reordered loads in TSO when their reordering is seen.
Instead, the reordering can be hidden form other cores
by the coherence protocol. The implication is that we
can irrevocably bind speculative loads. This allows us
to commit reordered loads out-of-order without having
to wait (for the loads to become non-speculative) or
without having to checkpoint committed state (and
rollback if needed), just to ensure correctness in the
rare case of some core seeing the reordering. We show
that by exposing a reordering to the coherence layer
and by appropriately modifying a typical directory
protocol we can successfully hide load-load reordering
without perceptible performance cost and without
deadlock. Our solution is cost-effective and increases
the performance of out-of-order commit by a sizable
margin, compared to the base case where memory
operations are not allowed to commit if the consistency
model could be violated.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Lee:2017:MVN,
author = "Doowon Lee and Valeria Bertacco",
title = "{MTraceCheck}: Validating Non-Deterministic Behavior
of Memory Consistency Models in Post-Silicon
Validation",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "201--213",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080235",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This work presents a minimally-intrusive,
high-performance, post-silicon validation framework for
validating memory consistency in multi-core systems.
Our framework generates constrained-random tests that
are instrumented with observability-enhancing code for
memory consistency verification. For each test, we
generate a set of compact signatures reflecting the
memory-ordering patterns observed over many executions
of the test, with each of the signatures corresponding
to a unique memory-ordering pattern. We then leverage
an efficient and novel analysis to quickly determine if
the observed execution patterns represented by each
unique signature abide by the memory consistency model.
Our analysis derives its efficiency by exploiting the
structural similarities among the patterns observed. We
evaluated our framework, MTraceCheck, on two platforms:
an x86-based desktop and an ARM-based SoC platform,
both running multi-threaded test programs in a
bare-metal environment. We show that MTraceCheck
reduces the perturbation introduced by the
memory-ordering monitoring activity by 93\% on average,
compared to a baseline register flushing approach that
saves the register's state after each load operation.
We also reduce the computation requirements of our
consistency checking analysis by 81\% on average,
compared to a conventional topological sorting
solution. We finally demonstrate the effectiveness of
MTraceCheck on buggy designs, by evaluating multiple
case studies where it successfully exposes subtle bugs
in a full-system simulation environment.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Zheng:2017:RMA,
author = "Ruohuang Zheng and Michael C. Huang",
title = "Redundant Memory Array Architecture for Efficient
Selective Protection",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "214--227",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080213",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Memory hardware errors may result from transient
particle-induced faults as well as device defects due
to aging. These errors are an important threat to
computer system reliability as VLSI technologies
continue to scale. Managing memory hardware errors is a
critical component in developing an overall system
dependability strategy. Memory error detection and
correction are supported in a range of available
hardware mechanisms. However, memory protections
(particularly the more advanced ones) come at
substantial costs in performance and energy usage.
Moreover, the protection mechanisms are often a fixed,
system-wide choice and can not easily adapt to
different protection demand of different applications
or memory regions. In this paper, we present a new RAIM
(redundant array of independent memory) design that
compared to the state-of-the-art implementation can
easily provide high protection capability and the
ability to selectively protect a subset of the memory.
A straightforward implementation of the design can
incur a substantial memory traffic overhead. We propose
a few practical optimizations to mitigate this
overhead. With these optimizations the proposed RAIM
design offers significant advantages over existing RAIM
design at lower or comparable costs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Hicks:2017:CAS,
author = "Matthew Hicks",
title = "{Clank}: Architectural Support for Intermittent
Computation",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "228--240",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080238",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The processors that drive embedded systems are getting
smaller; meanwhile, the batteries used to provide power
to those systems have stagnated. If we are to realize
the dream of ubiquitous computing promised by the
Internet of Things, processors must shed large, heavy,
expensive, and high maintenance batteries and, instead,
harvest energy from their environment. One challenge
with this transition is that harvested energy is
insufficient for continuous operation. Unfortunately,
existing programs fail miserably when executed
intermittently. This paper presents Clank: lightweight
architectural support for correct and efficient
execution of long-running applications on harvested
energy---without programmer intervention or extreme
hardware modifications. Clank is a set of hardware
buffers and memory-access monitors that dynamically
maintain idempotency. Essentially, Clank dynamically
decomposes program execution into a stream of
restartable sub-executions connected via lightweight
checkpoints. To validate Clank's ability to correctly
stretch program execution across frequent, random power
cycles, and to explore the associated hardware and
software overheads, we implement Clank in Verilog,
formally verify it, and then add it to an ARM Cortex
M0+ processor which we use to run a set of 23 embedded
systems benchmarks. Experiments show run-time overheads
as low as 2.5\%, with run-time overheads of 6\% for a
version of Clank that adds 1.7\% hardware. Clank
minimizes checkpoints so much that re-execution time
becomes the dominate contributor to run-time
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kaliorakis:2017:MED,
author = "Manolis Kaliorakis and Dimitris Gizopoulos and Ramon
Canal and Antonio Gonzalez",
title = "{MeRLiN}: Exploiting Dynamic Instruction Behavior for
Fast and Accurate Microarchitecture Level Reliability
Assessment",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "241--254",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080225",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Early reliability assessment of hardware structures
using microarchitecture level simulators can
effectively guide major error protection decisions in
microprocessor design. Statistical fault injection on
microarchitectural structures modeled in performance
simulators is an accurate method to measure their
Architectural Vulnerability Factor (AVF) but requires
excessively long campaigns to obtain high statistical
significance. We propose MeRLiN1, a methodology to
boost microarchitecture level injection-based
reliability assessment by several orders of magnitude
and keep the accuracy of the assessment unaffected even
for large injection campaigns with very high
statistical significance. The core of MeRLiN is the
grouping of faults of an initial list in equivalent
classes. All faults in the same group target equivalent
vulnerable intervals of program execution ending up to
the same static instruction that reads the faulty
entries. Faults in the same group occur in different
times and entries of a structure and it is extremely
likely that they all have the same effect in program
execution; thus, fault injection is performed only on a
few representatives from each group. We evaluate MeRLiN
for different sizes of the physical register file, the
store queue and the first level data cache of a
contemporary microarchitecture running MiBench and SPEC
CPU2006 benchmarks. For all our experiments, MeRLiN is
from 2 to 3 orders of magnitude faster than an
extremely high statistical significant injection
campaign, reporting the same reliability measurements
with negligible loss of accuracy. Finally, we
theoretically analyze MeRLiN's statistical behavior to
further justify its accuracy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Patel:2017:RPR,
author = "Minesh Patel and Jeremie S. Kim and Onur Mutlu",
title = "The Reach Profiler {(REAPER)}: Enabling the Mitigation
of {DRAM} Retention Failures via Profiling at
Aggressive Conditions",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "255--268",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080242",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Modern DRAM-based systems suffer from significant
energy and latency penalties due to conservative DRAM
refresh standards. Volatile DRAM cells can retain
information across a wide distribution of times ranging
from milliseconds to many minutes, but each cell is
currently refreshed every 64ms to account for the
extreme tail end of the retention time distribution,
leading to a high refresh overhead. Due to poor DRAM
technology scaling, this problem is expected to get
worse in future device generations. Hence, the current
approach of refreshing all cells with the worst-case
refresh rate must be replaced with a more intelligent
design. Many prior works propose reducing the refresh
overhead by extending the default refresh interval to a
higher value, which we refer to as the target refresh
interval, across parts or all of a DRAM chip. These
proposals handle the small set of failing cells that
cannot retain data throughout the entire extended
refresh interval via retention failure mitigation
mechanisms (e.g., error correcting codes or bit-repair
mechanisms). This set of failing cells is discovered
via retention failure profiling, which is currently a
brute-force process that writes a set of known data to
DRAM, disables refresh and waits for the duration of
the target refresh interval, and then checks for
retention failures across the DRAM chip. We show that
this brute-force approach is too slow and is
detrimental to system execution, especially with
frequent online profiling. This paper presents reach
profiling, a new methodology for retention failure
profiling based on the key observation that an
overwhelming majority of failing DRAM cells at a target
refresh interval fail more reliably at both longer
refresh intervals and higher temperatures. Using 368
state-of-the-art LPDDR4 DRAM chips from three major
vendors, we conduct a thorough experimental
characterization of the complex set of tradeoffs
inherent in the profiling process. We identify three
key metrics to guide design choices for retention
failure profiling and mitigation mechanisms: coverage,
false positive rate, and runtime. We propose reach
profiling, a new retention failure profiling mechanism
whose key idea is to profile failing cells at a longer
refresh interval and/or higher temperature relative to
the target conditions in order to maximize failure
coverage while minimizing the false positive rate and
profiling runtime. We thoroughly explore the tradeoffs
associated with reach profiling and show that there is
significant room for improvement in DRAM retention
failure profiling beyond the brute-force approach. We
show with experimental data that on average, by
profiling at 250ms above the target refresh interval,
our first implementation of reach profiling (called
REAPER) can attain greater than 99\% coverage of
failing DRAM cells with less than a 50\% false positive
rate while running 2.5x faster than the brute-force
approach. In addition, our end-to-end evaluations show
that REAPER enables significant system performance
improvement and DRAM power reduction, outperforming the
brute-force approach and enabling high-performance
operation at longer refresh intervals that were
previously unreasonable to employ due to the high
associated profiling overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2017:QSS,
author = "Zhenning Wang and Jun Yang and Rami Melhem and Bruce
Childers and Youtao Zhang and Minyi Guo",
title = "Quality of Service Support for Fine-Grained Sharing on
{GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "269--281",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080203",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPUs have been widely adopted in data centers to
provide acceleration services to many applications.
Sharing a GPU is increasingly important for better
processing throughput and energy efficiency. However,
quality of service (QoS) among concurrent applications
is minimally supported. Previous efforts are too
coarse-grained and not scalable with increasing QoS
requirements. We propose QoS mechanisms for a
fine-grained form of GPU sharing. Our QoS support can
provide control over the progress of kernels on a per
cycle basis and the amount of thread-level parallelism
of each kernel. Due to accurate resource management,
our QoS support has significantly better scalability
compared with previous best efforts. Evaluations show
that, when the GPU is shared by three kernels, two of
which have QoS goals, the proposed techniques achieve
QoS goals 43.8\% more often than previous techniques
and have 20.5\% higher throughput.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Chen:2017:AGH,
author = "Sui Chen and Lu Peng and Samuel Irving",
title = "Accelerating {GPU} Hardware Transactional Memory with
Snapshot Isolation",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "282--294",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080204",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Snapshot Isolation (SI) is an established model in the
database community, which permits write-read conflicts
to pass and aborts transactions only on write-write
conflicts. With the Write Skew anomaly correctly
eliminated, SI can reduce the occurrence of aborts,
save the work done by transactions, and greatly benefit
long transactions involving complex data structures.
GPUs are evolving towards a general-purpose computing
device with growing support for irregular workloads,
including transactional memory. The usage of snapshot
isolation on transactional memory has proven to be
greatly beneficial for performance. In this paper, we
propose a multi-versioned memory subsystem for
hardware-based transactional memory on the GPU, with a
method for eliminating the Write Skew anomaly on the
fly, and finally incorporate Snapshot Isolation with
this system. The results show that snapshot isolation
can effectively boost the performance of dynamically
sized data structures such as linked lists, binary
trees and red-black trees, sometimes by as much as
4.5x, which results in improved overall performance of
benchmarks utilizing these data structures.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Wang:2017:DAC,
author = "Kai Wang and Calvin Lin",
title = "Decoupled Affine Computation for {SIMT GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "295--306",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080205",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper introduces a method of decoupling affine
computations---a class of expressions that produces
extremely regular values across SIMT threads---from the
main execution stream, so that the affine computations
can be performed with greater efficiency and with
greater independence from the main execution stream.
This decoupling has two benefits: (1) For compute-bound
programs, it significantly reduces the dynamic warp
instruction count; (2) for memory-bound workloads, it
significantly reduces memory latency, since it acts as
a non-speculative prefetcher for the data specified by
the many memory address calculations that are affine
computations. We evaluate our solution, known as
Decoupled Affine Computation (DAC), using GPGPU-sim and
a set of 29 GPGPU programs. We find that on average,
DAC improves performance by 40\% and reduces energy
consumption by 20\%. For the 11 compute-bound
benchmarks, DAC improves performance by 34\%, compared
with 11\% for the previous state-of-the-art. For the 18
memory-bound programs, DAC improves performance by an
average of 44\%, compared with 16\% for
state-of-the-art GPU prefetcher.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Koo:2017:APA,
author = "Gunjae Koo and Yunho Oh and Won Woo Ro and Murali
Annavaram",
title = "Access Pattern-Aware Cache Management for Improving
Data Utilization in {GPU}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "307--319",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080239",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Long latency of memory operation is a prominent
performance bottleneck in graphics processing units
(GPUs). The small data cache that must be shared across
dozens of warps (a collection of threads) creates
significant cache contention and premature data
eviction. Prior works have recognized this problem and
proposed warp throttling which reduces the number of
active warps contending for cache space. In this paper
we discover that individual load instructions in a warp
exhibit four different types of data locality behavior:
(1) data brought by a warp load instruction is used
only once, which is classified as streaming data (2)
data brought by a warp load is reused multiple times
within the same warp, called intra-warp locality (3)
data brought by a warp is reused multiple times but
across different warps, called inter-warp locality (4)
and some data exhibit both a mix of intra- and
inter-warp locality. Furthermore, each load instruction
exhibits consistently the same locality type across all
warps within a GPU kernel. Based on this discovery we
argue that cache management must be done using per-load
locality type information, rather than applying
warp-wide cache management policies. We propose Access
Pattern-aware Cache Management (APCM), which
dynamically detects the locality type of each load
instruction by monitoring the accesses from one
exemplary warp. APCM then uses the detected locality
type to selectively apply cache bypassing and cache
pinning of data based on load locality
characterization. Using an extensive set of simulations
we show that APCM improves performance of GPUs by 34\%
for cache sensitive applications while saving 27\% of
energy consumption over baseline GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Arunkumar:2017:MGM,
author = "Akhil Arunkumar and Evgeny Bolotin and Benjamin Cho
and Ugljesa Milic and Eiman Ebrahimi and Oreste Villa
and Aamer Jaleel and Carole-Jean Wu and David Nellans",
title = "{MCM-GPU}: Multi-Chip-Module {GPUs} for Continued
Performance Scalability",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "320--332",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080231",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Historically, improvements in GPU-based high
performance computing have been tightly coupled to
transistor scaling. As Moore's law slows down, and the
number of transistors per die no longer grows at
historical rates, the performance curve of single
monolithic GPUs will ultimately plateau. However, the
need for higher performing GPUs continues to exist in
many domains. To address this need, in this paper we
demonstrate that package-level integration of multiple
GPU modules to build larger logical GPUs can enable
continuous performance scaling beyond Moore's law.
Specifically, we propose partitioning GPUs into easily
manufacturable basic GPU Modules (GPMs), and
integrating them on package using high bandwidth and
power efficient signaling technologies. We lay out the
details and evaluate the feasibility of a basic
Multi-Chip-Module GPU (MCM-GPU) design. We then propose
three architectural optimizations that significantly
improve GPM data locality and minimize the sensitivity
on inter-GPM bandwidth. Our evaluation shows that the
optimized MCM-GPU achieves 22.8\% speedup and 5x
inter-GPM bandwidth reduction when compared to the
basic MCM-GPU architecture. Most importantly, the
optimized MCM-GPU design is 45.5\% faster than the
largest implementable monolithic GPU, and performs
within 10\% of a hypothetical (and unbuildable)
monolithic GPU. Lastly we show that our optimized
MCM-GPU is 26.8\% faster than an equally equipped
Multi-GPU system with the same total number of SMs and
DRAM bandwidth.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nazari:2017:EEB,
author = "Alireza Nazari and Nader Sehatbakhsh and Monjur Alam
and Alenka Zajic and Milos Prvulovic",
title = "{EDDIE}: {EM}-Based Detection of Deviations in Program
Execution",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "333--346",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080223",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper describes EM-Based Detection of Deviations
in Program Execution (EDDIE), a new method for
detecting anomalies in program execution, such as
malware and other code injections, without introducing
any overheads, adding any hardware support, changing
any software, or using any resources on the monitored
system itself. Monitoring with EDDIE involves receiving
electromagnetic (EM) emanations that are emitted as a
side effect of execution on the monitored system, and
it relies on spikes in the EM spectrum that are
produced as a result of periodic (e.g. loop) activity
in the monitored execution. During training, EDDIE
characterizes normal execution behavior in terms of
peaks in the EM spectrum that are observed at various
points in the program execution, but it does not need
any characterization of the malware or other code that
might later be injected. During monitoring, EDDIE
identifies peaks in the observed EM spectrum, and
compares these peaks to those learned during training.
Since EDDIE requires no resources on the monitored
machine and no changes to the monitored software, it is
especially well suited for security monitoring of
embedded and IoT devices. We evaluate EDDIE on a real
IoT system and in a cycle-accurate simulator, and find
that even relatively brief injected bursts of activity
(a few milliseconds) are detected by EDDIE with high
accuracy, and that it also accurately detects when even
a few instructions are injected into an existing loop
within the application.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yan:2017:SHA,
author = "Mengjia Yan and Bhargava Gopireddy and Thomas Shull
and Josep Torrellas",
title = "Secure Hierarchy-Aware Cache Replacement Policy
{(SHARP)}: Defending Against Cache-Based Side Channel
Atacks",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "347--360",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080222",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "In cache-based side channel attacks, a spy that shares
a cache with a victim probes cache locations to extract
information on the victim's access patterns. For
example, in evict+reload, the spy repeatedly evicts and
then reloads a probe address, checking if the victim
has accessed the address in between the two operations.
While there are many proposals to combat these cache
attacks, they all have limitations: they either hurt
performance, require programmer intervention, or can
only defend against some types of attacks. This paper
makes the following observation for an environment with
an inclusive cache hierarchy: when the spy evicts the
probe address from the shared cache, the address will
also be evicted from the private cache of the victim
process, creating an inclusion victim. Consequently, to
disable cache attacks, this paper proposes to alter the
line replacement algorithm of the shared cache, to
prevent a process from creating inclusion victims in
the caches of cores running other processes. By
enforcing this rule, the spy cannot evict the probe
address from the shared cache and, hence, cannot
glimpse any information on the victim's access
patterns. We call our proposal SHARP (Secure
Hierarchy-Aware cache Replacement Policy). SHARP
efficiently defends against all existing cross-core
shared-cache attacks, needs only minimal hardware
modifications, and requires no code modifications. We
implement SHARP in a cycle-level full-system simulator.
We show that it protects against real-world attacks,
and that it introduces negligible average performance
degradation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Deng:2017:LLH,
author = "Zhaoxia Deng and Ariel Feldman and Stuart A. Kurtz and
Frederic T. Chong",
title = "Lemonade from Lemons: Harnessing Device Wearout to
Create Limited-Use Security Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "361--374",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080226",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Most architectures are designed to mitigate the
usually undesirable phenomenon of device wearout. We
take a contrarian view and harness this phenomenon to
create hardware security mechanisms that resist attacks
by statistically enforcing an upper bound on hardware
uses, and consequently attacks. For example, let us
assume that a user may log into a smartphone a maximum
of 50 times a day for 5 years, resulting in
approximately 91,250 legitimate uses. If we assume at
least 8-character passwords and we require login (and
retrieval of the storage decryption key) to traverse
hardware that wears out in 91,250 uses, then an
adversary has a negligible chance of successful
brute-force attack before the hardware wears out, even
assuming real-world password cracking by professionals.
M-way replication of our hardware and periodic
re-encryption of storage can increase the daily usage
bound by a factor of M. The key challenge is to achieve
practical statistical bounds on both minimum and
maximum uses for an architecture, given that individual
devices can vary widely in wearout characteristics. We
introduce techniques for architecturally controlling
these bounds and perform a design space exploration for
three use cases: a limited-use connection, a
limited-use targeting system and one-time pads. These
techniques include decision trees, parallel structures,
Shamir's secret-sharing mechanism, Reed--Solomon codes,
and module replication. We explore the cost in area,
energy and latency of using these techniques to achieve
system-level usage targets given device-level wearout
distributions. With redundant encoding, for example, we
can improve exponential sensitivity to device lifetime
variation to linear sensitivity, reducing the total
number of NEMS devices by 4 orders of magnitude to
about 0.8 million for limited-use connections (compared
with 4 billion if without redundant encoding).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Altaf:2017:LHL,
author = "Muhammad Shoaib Bin Altaf and David A. Wood",
title = "{LogCA}: a High-Level Performance Model for Hardware
Accelerators",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "375--388",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080216",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With the end of Dennard scaling, architects have
increasingly turned to special-purpose hardware
accelerators to improve the performance and energy
efficiency for some applications. Unfortunately,
accelerators don't always live up to their expectations
and may under-perform in some situations. Understanding
the factors which effect the performance of an
accelerator is crucial for both architects and
programmers early in the design stage. Detailed models
can be highly accurate, but often require low-level
details which are not available until late in the
design cycle. In contrast, simple analytical models can
provide useful insights by abstracting away low-level
system details. In this paper, we propose LogCA---a
high-level performance model for hardware accelerators.
LogCA helps both programmers and architects identify
performance bounds and design bottlenecks early in the
design cycle, and provide insight into which
optimizations may alleviate these bottlenecks. We
validate our model across a variety of kernels, ranging
from sub-linear to super-linear complexities on both
on-chip and off-chip accelerators. We also describe the
utility of LogCA using two retrospective case studies.
First, we discuss the evolution of interface design in
SUN/Oracle's encryption accelerators. Second, we
discuss the evolution of memory interface design in
three different GPU architectures. In both cases, we
show that the adopted design optimizations for these
machines are similar to LogCA's suggested
optimizations. We argue that architects and programmers
can use insights from these retrospective studies for
improving future designs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Prabhakar:2017:PRA,
author = "Raghu Prabhakar and Yaqi Zhang and David Koeplinger
and Matt Feldman and Tian Zhao and Stefan Hadjis and
Ardavan Pedram and Christos Kozyrakis and Kunle
Olukotun",
title = "{Plasticine}: a Reconfigurable Architecture For
Parallel Paterns",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "389--402",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080256",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Reconfigurable architectures have gained popularity in
recent years as they allow the design of
energy-efficient accelerators. Fine-grain fabrics (e.g.
FPGAs) have traditionally suffered from performance and
power inefficiencies due to bit-level reconfigurable
abstractions. Both fine-grain and coarse-grain
architectures (e.g. CGRAs) traditionally require low
level programming and suffer from long compilation
times. We address both challenges with Plasticine, a
new spatially reconfigurable architecture designed to
efficiently execute applications composed of parallel
patterns. Parallel patterns have emerged from recent
research on parallel programming as powerful,
high-level abstractions that can elegantly capture data
locality, memory access patterns, and parallelism
across a wide range of dense and sparse applications.
We motivate Plasticine by first observing key
application characteristics captured by parallel
patterns that are amenable to hardware acceleration,
such as hierarchical parallelism, data locality, memory
access patterns, and control flow. Based on these
observations, we architect Plasticine as a collection
of Pattern Compute Units and Pattern Memory Units.
Pattern Compute Units are multi-stage pipelines of
reconfigurable SIMD functional units that can
efficiently execute nested patterns. Data locality is
exploited in Pattern Memory Units using banked
scratchpad memories and configurable address decoders.
Multiple on-chip address generators and scatter-gather
engines make efficient use of DRAM bandwidth by
supporting a large number of outstanding memory
requests, memory coalescing, and burst mode for dense
accesses. Plasticine has an area footprint of 113 mm2
in a 28nm process, and consumes a maximum power of 49 W
at a 1 GHz clock. Using a cycle-accurate simulator, we
demonstrate that Plasticine provides an improvement of
up to 76.9x in performance-per-Watt over a conventional
FPGA over a wide range of dense and sparse
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kung:2017:PHA,
author = "Jaeha Kung and Yun Long and Duckhwan Kim and Saibal
Mukhopadhyay",
title = "A Programmable Hardware Accelerator for Simulating
Dynamical Systems",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "403--415",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080252",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The fast and energy-efficient simulation of dynamical
systems defined by coupled ordinary/partial
differential equations has emerged as an important
problem. The accelerated simulation of coupled ODE/PDE
is critical for analysis of physical systems as well as
computing with dynamical systems. This paper presents a
fast and programmable accelerator for simulating
dynamical systems. The computing model of the proposed
platform is based on multilayer cellular nonlinear
network (CeNN) augmented with nonlinear function
evaluation engines. The platform can be programmed to
accelerate wide classes of ODEs/PDEs by modulating the
connectivity within the multilayer CeNN engine. An
innovative hardware architecture including data reuse,
memory hierarchy, and near-memory processing is
designed to accelerate the augmented multilayer CeNN. A
dataflow model is presented which is supported by
optimized memory hierarchy for efficient function
evaluation. The proposed solver is designed and
synthesized in 15nm technology for the hardware
analysis. The performance is evaluated and compared to
GPU nodes when solving wide classes of differential
equations and the power consumption is analyzed to show
orders of magnitude improvement in energy efficiency.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Nowatzki:2017:SDA,
author = "Tony Nowatzki and Vinay Gangadhar and Newsha Ardalani
and Karthikeyan Sankaralingam",
title = "Stream-Dataflow Acceleration",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "416--429",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080255",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Demand for low-power data processing hardware
continues to rise inexorably. Existing programmable and
``general purpose'' solutions (eg. SIMD, GPGPUs) are
insufficient, as evidenced by the order-of-magnitude
improvements and industry adoption of application and
domain-specific accelerators in important areas like
machine learning, computer vision and big data. The
stark tradeoffs between efficiency and generality at
these two extremes poses a difficult question: how
could domain-specific hardware efficiency be achieved
without domain-specific hardware solutions? In this
work, we rely on the insight that ``acceleratable''
algorithms have broad common properties: high
computational intensity with long phases, simple
control patterns and dependences, and simple streaming
memory access and reuse patterns. We define a general
architecture (a hardware-software interface) which can
more efficiently expresses program with these
properties called stream-dataflow. The dataflow
component of this architecture enables high
concurrency, and the stream component enables
communication and coordination at very-low power and
area overhead. This paper explores the hardware and
software implications, describes its detailed
microarchitecture, and evaluates an implementation.
Compared to a state-of-the-art domain specific
accelerator (DianNao), and fixed-function accelerators
for MachSuite, Softbrain can match their performance
with only 2x power overhead on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yan:2017:HTC,
author = "Zi Yan and J{\'a}n Vesel{\'y} and Guilherme Cox and
Abhishek Bhattacharjee",
title = "Hardware Translation Coherence for Virtualized
Systems",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "430--443",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080211",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "To improve system performance, operating systems
(OSes) often undertake activities that require
modification of virtual-to-physical address
translations. For example, the OS may migrate data
between physical pages to manage heterogeneous memory
devices. We refer to such activities as page
remappings. Unfortunately, page remappings are
expensive. We show that a big part of this cost arises
from address translation coherence, particularly on
systems employing virtualization. In response, we
propose hardware translation invalidation and coherence
or HATRIC, a readily implementable hardware mechanism
to piggyback translation coherence atop existing cache
coherence protocols. We perform detailed studies using
KVM-based virtualization, showing that HATRIC achieves
up to 30\% performance and 10\% energy benefits, for
per-CPU area overheads of 0.2\%. We also quantify
HATRIC's benefits on systems running Xen and find up to
33\% performance improvements.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Park:2017:HTC,
author = "Chang Hyun Park and Taekyung Heo and Jungi Jeong and
Jaehyuk Huh",
title = "Hybrid {TLB} Coalescing: Improving {TLB} Translation
Coverage under Diverse Fragmented Memory Allocations",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "444--456",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080217",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "To mitigate excessive TLB misses in large memory
applications, techniques such as large pages, variable
length segments, and HW coalescing, increase the
coverage of limited hardware translation entries by
exploiting the contiguous memory allocation. However,
recent studies show that in non-uniform memory systems,
using large pages often leads to performance
degradation, or allocating large chunks of memory
becomes more difficult due to memory fragmentation.
Although each of the prior techniques favors its own
best chunk size, diverse contiguity of memory
allocation in real systems cannot always provide the
optimal chunk of each technique. Under such fragmented
and diverse memory allocations, this paper proposes a
novel HW-SW hybrid translation architecture, which can
adapt to different memory mappings efficiently. In the
proposed hybrid coalescing technique, the operating
system encodes memory contiguity information in a
subset of page table entries, called anchor entries.
During address translation through TLBs, an anchor
entry provides translation for contiguous pages
following the anchor entry. As a smaller number of
anchor entries can cover a large portion of virtual
address space, the efficiency of TLB can be
significantly improved. The most important benefit of
hybrid coalescing is its ability to change the coverage
of the anchor entry dynamically, reflecting the current
allocation contiguity status. By using the contiguity
information directly set by the operating system, the
technique can provide scalable translation coverage
improvements with minor hardware changes, while
allowing the flexibility of memory allocation. Our
experimental results show that across diverse
allocation scenarios with different distributions of
contiguous memory chunks, the proposed scheme can
effectively reap the potential translation coverage
improvement from the existing contiguity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Alam:2017:DIY,
author = "Hanna Alam and Tianhao Zhang and Mattan Erez and Yoav
Etsion",
title = "Do-It-Yourself Virtual Memory Translation",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "457--468",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080209",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "In this paper, we introduce the Do-It-Yourself virtual
memory translation (DVMT) architecture as a flexible
complement for current hardware-fixed translation
flows. DVMT decouples the virtual-to-physical mapping
process from the access permissions, giving
applications freedom in choosing mapping schemes, while
maintaining security within the operating system.
Furthermore, DVMT is designed to support virtualized
environments, as a means to collapse the costly,
hardware-assisted two-dimensional translations. We
describe the architecture in detail and demonstrate its
effectiveness by evaluating several different DVMT
schemes on a range of virtualized applications with a
model based on measurements from a commercial system.
We show that different DVMT configurations preserve the
native performance, while achieving speedups of 1.2x to
2.0x in virtualized environments.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ryoo:2017:RTD,
author = "Jee Ho Ryoo and Nagendra Gulur and Shuang Song and
Lizy K. John",
title = "Rethinking {TLB} Designs in Virtualized Environments:
a Very Large Part-of-Memory {TLB}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "469--480",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080210",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "With increasing deployment of virtual machines for
cloud services and server applications, memory address
translation overheads in virtualized environments have
received great attention. In the radix-4 type of page
tables used in x86 architectures, a TLB-miss
necessitates up to 24 memory references for one guest
to host translation. While dedicated page walk caches
and such recent enhancements eliminate many of these
memory references, our measurements on the Intel
Skylake processors indicate that many programs in
virtualized mode of execution still spend hundreds of
cycles for translations that do not hit in the TLBs.
This paper presents an innovative scheme to reduce the
cost of address translations by using a very large
Translation Lookaside Buffer that is part of memory,
the POM-TLB. In the POM-TLB, only one access is
required instead of up to 24 accesses required in
commonly used 2D walks with radix-4 type of page
tables. Even if many of the 24 accesses may hit in the
page walk caches, the aggregated cost of the many hits
plus the overhead of occasional misses from page walk
caches still exceeds the cost of one access to the
POM-TLB. Since the POM-TLB is part of the memory space,
TLB entries (as opposed to multiple page table entries)
can be cached in large L2 and L3 data caches, yielding
significant benefits. Through detailed evaluation
running SPEC, PARSEC and graph workloads, we
demonstrate that the proposed POM-TLB improves
performance by approximately 10\% on average. The
improvement is more than 16\% for 5 of the benchmarks.
It is further seen that a POM-TLB of 16MB size can
eliminate nearly all TLB misses in 8-core systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kolli:2017:LLP,
author = "Aasheesh Kolli and Vaibhav Gogte and Ali Saidi and
Stephan Diestelhorst and Peter M. Chen and Satish
Narayanasamy and Thomas F. Wenisch",
title = "Language-level persistency",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "481--493",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080229",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The commercial release of byte-addressable persistent
memories, such as Intel/Micron 3D XPoint memory, is
imminent. Ongoing research has sought mechanisms to
allow programmers to implement recoverable data
structures in these new main memories. Ensuring
recoverability requires programmer control of the order
of persistent stores; recent work proposes persistency
models as an extension to memory consistency to specify
such ordering. Prior work has considered persistency
models at the abstraction of the instruction set
architecture. Instead, we argue for extending the
language-level memory model to provide guarantees on
the order of persistent writes. We explore a taxonomy
of guarantees a language-level persistency model might
provide, considering both atomicity and ordering
constraints on groups of persistent stores. Then, we
propose and evaluate Acquire-Release Persistency (ARP),
a language-level persistency model for C++11. We
describe how to compile code written for ARP to a
state-of-the-art ISA-level persistency model. We then
consider enhancements to the ISA-level persistency
model that can distinguish memory consistency
constraints required for proper synchronization but
unnecessary for correct recovery. With these
optimizations, we show that ARP increases performance
by up to 33.2\% (19.8\% avg.) over coding directly to
the baseline ISA-level persistency model for a suite of
persistent-write-intensive workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Choi:2017:SAS,
author = "Jiho Choi and Thomas Shull and Maria J. Garzaran and
Josep Torrellas",
title = "{ShortCut}: Architectural Support for Fast Object
Access in Scripting Languages",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "494--506",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080237",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The same flexibility that makes dynamic scripting
languages appealing to programmers is also the primary
cause of their low performance. To access objects of
potentially different types, the compiler creates a
dispatcher with a series of if statements, each
performing a comparison to a type and a jump to a
handler. This induces major overhead in instructions
executed and branches mispredicted. This paper proposes
architectural support to significantly improve the
efficiency of accesses to objects. The idea is to
modify the instruction that calls the dispatcher so
that, under most conditions, it skips most of the
branches and instructions needed to reach the correct
handler, and sometimes even the execution of the
handler itself. Our novel architecture, called
ShortCut, performs two levels of optimization. Its
Plain design transforms the call to the dispatcher into
a call to the correct handler --- bypassing the whole
dispatcher execution. Its Aggressive design transforms
the call to the dispatcher into a simple load or store
--- bypassing the execution of both dispatcher and
handler. We implement the ShortCut software in the
state-of-the-art Google V8 JIT compiler, and the
ShortCut hardware in a simulator. We evaluate ShortCut
with the Octane and SunSpider JavaScript application
suites. Plain ShortCut reduces the average execution
time of the applications by 30\% running under the
baseline compiler, and by 11\% running under the
maximum level of compiler optimization. Aggressive
ShortCut performs only slightly better.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Gope:2017:ASS,
author = "Dibakar Gope and David J. Schlais and Mikko H.
Lipasti",
title = "Architectural Support for Server-Side {PHP}
Processing",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "507--520",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080234",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "PHP is the dominant server-side scripting language
used to implement dynamic web content. Just-in-time
compilation, as implemented in Facebook's
state-of-the-art HipHopVM, helps mitigate the poor
performance of PHP, but substantial overheads remain,
especially for realistic, large-scale PHP applications.
This paper analyzes such applications and shows that
there is little opportunity for conventional
microarchitectural enhancements. Furthermore, prior
approaches for function-level hardware acceleration
present many challenges due to the extremely flat
distribution of execution time across a large number of
functions in these complex applications. In-depth
analysis reveals a more promising alternative: targeted
acceleration of four fine-grained PHP activities: hash
table accesses, heap management, string manipulation,
and regular expression handling. We highlight a set of
guiding principles and then propose and evaluate
inexpensive hardware accelerators for these activities
that accrue substantial performance and energy gains
across dozens of functions. Our results reflect an
average 17.93\% improvement in performance and 21.01\%
reduction in energy while executing these complex PHP
workloads on a state-of-the-art software and hardware
platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kannan:2017:HDH,
author = "Sudarsun Kannan and Ada Gavrilovska and Vishal Gupta
and Karsten Schwan",
title = "{HeteroOS}: {OS} Design for Heterogeneous Memory
Management in Datacenter",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "521--534",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080245",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Heterogeneous memory management combined with server
virtualization in datacenters is expected to increase
the software and OS management complexity.
State-of-the-art solutions rely exclusively on the
hypervisor (VMM) for expensive page hotness tracking
and migrations, limiting the benefits from
heterogeneity. To address this, we design HeteroOS, a
novel application-transparent OS-level solution for
managing memory heterogeneity in virtualized system.
The HeteroOS design first makes the guest-OSes
heterogeneity-aware and then extracts rich OS-level
information about applications' memory usage to place
data in the 'right' memory avoiding page migrations.
When such pro-active placements are not possible,
HeteroOS combines the power of the guest-OSes'
information about applications with the VMM's hardware
control to track for hotness and migrate only
performance-critical pages. Finally, HeteroOS also
designs an efficient heterogeneous memory sharing
across multiple guest-VMs. Evaluation of HeteroOS with
memory, storage, and network-intensive datacenter
applications shows up to 2x performance improvement
compared to the state-of-the-art VMM-exclusive
approach.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Shen:2017:MCA,
author = "Yongming Shen and Michael Ferdman and Peter Milder",
title = "Maximizing {CNN} Accelerator Efficiency Through
Resource Partitioning",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "535--547",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080221",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Convolutional neural networks (CNNs) are
revolutionizing machine learning, but they present
significant computational challenges. Recently, many
FPGA-based accelerators have been proposed to improve
the performance and efficiency of CNNs. Current
approaches construct a single processor that computes
the CNN layers one at a time; the processor is
optimized to maximize the throughput at which the
collection of layers is computed. However, this
approach leads to inefficient designs because the same
processor structure is used to compute CNN layers of
radically varying dimensions. We present a new CNN
accelerator paradigm and an accompanying automated
design methodology that partitions the available FPGA
resources into multiple processors, each of which is
tailored for a different subset of the CNN
convolutional layers. Using the same FPGA resources as
a single large processor, multiple smaller specialized
processors increase computational efficiency and lead
to a higher overall throughput. Our design methodology
achieves 3.8x higher throughput than the
state-of-the-art approach on evaluating the popular
AlexNet CNN on a Xilinx Virtex-7 FPGA. For the more
recent SqueezeNet and GoogLeNet, the speedups are 2.2x
and 2.0x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Yu:2017:SCD,
author = "Jiecao Yu and Andrew Lukefahr and David Palframan and
Ganesh Dasika and Reetuparna Das and Scott Mahlke",
title = "{Scalpel}: Customizing {DNN} Pruning to the Underlying
Hardware Parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "548--560",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080215",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "As the size of Deep Neural Networks (DNNs) continues
to grow to increase accuracy and solve more complex
problems, their energy footprint also scales. Weight
pruning reduces DNN model size and the computation by
removing redundant weights. However, we implemented
weight pruning for several popular networks on a
variety of hardware platforms and observed surprising
results. For many networks, the network sparsity caused
by weight pruning will actually hurt the overall
performance despite large reductions in the model size
and required multiply-accumulate operations. Also,
encoding the sparse format of pruned networks incurs
additional storage space overhead. To overcome these
challenges, we propose Scalpel that customizes DNN
pruning to the underlying hardware by matching the
pruned network structure to the data-parallel hardware
organization. Scalpel consists of two techniques:
SIMD-aware weight pruning and node pruning. For
low-parallelism hardware (e.g., microcontroller),
SIMD-aware weight pruning maintains weights in aligned
fixed-size groups to fully utilize the SIMD units. For
high-parallelism hardware (e.g., GPU), node pruning
removes redundant nodes, not redundant weights, thereby
reducing computation without sacrificing the dense
matrix format. For hardware with moderate parallelism
(e.g., desktop CPU), SIMD-aware weight pruning and node
pruning are synergistically applied together. Across
the microcontroller, CPU and GPU, Scalpel achieves mean
speedups of 3.54x, 2.61x, and 1.25x while reducing the
model sizes by 88\%, 82\%, and 53\%. In comparison,
traditional weight pruning achieves mean speedups of
1.90x, 1.06x, 0.41x across the three platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Sa:2017:UOA,
author = "Christopher {De Sa} and Matthew Feldman and
Christopher R{\'e} and Kunle Olukotun",
title = "Understanding and Optimizing Asynchronous
Low-Precision Stochastic Gradient Descent",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "561--574",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080248",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Stochastic gradient descent (SGD) is one of the most
popular numerical algorithms used in machine learning
and other domains. Since this is likely to continue for
the foreseeable future, it is important to study
techniques that can make it run fast on parallel
hardware. In this paper, we provide the first analysis
of a technique called Buck-wild! that uses both
asynchronous execution and low-precision computation.
We introduce the DMGC model, the first
conceptualization of the parameter space that exists
when implementing low-precision SGD, and show that it
provides a way to both classify these algorithms and
model their performance. We leverage this insight to
propose and analyze techniques to improve the speed of
low-precision SGD. First, we propose software
optimizations that can increase throughput on existing
CPUs by up to 11X. Second, we propose architectural
changes, including a new cache technique we call an
obstinate cache, that increase throughput beyond the
limits of current-generation hardware. We also
implement and analyze low-precision SGD on the FPGA,
which is a promising alternative to the CPU for future
SGD systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Li:2017:API,
author = "Zhaoshi Li and Leibo Liu and Yangdong Deng and Shouyi
Yin and Yao Wang and Shaojun Wei",
title = "Aggressive Pipelining of Irregular Applications on
Reconfigurable Hardware",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "575--586",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080228",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "CPU-FPGA heterogeneous platforms offer a promising
solution for high-performance and energy-efficient
computing systems by providing specialized accelerators
with post-silicon reconfigurability. To unleash the
power of FPGA, however, the programmability gap has to
be filled so that applications specified in high-level
programming languages can be efficiently mapped and
scheduled on FPGA. The above problem is even more
challenging for irregular applications, in which the
execution dependency can only be determined at run
time. Thus over-serialized accelerators are generated
from existing works that rely on compile time analysis
to schedule the computation. In this work, we propose a
comprehensive software-hardware co-design framework,
which captures parallelism in irregular applications
and aggressively schedules pipelined execution on
reconfigurable platform. Based on an inherently
parallel abstraction packaging parallelism for runtime
schedule, our framework significantly differs from
existing works that tend to schedule executions at
compile time. An irregular application is formulated as
a set of tasks with their dependencies specified as
rules describing the conditions under which a subset of
tasks can be executed concurrently. Then datapaths on
FPGA will be generated by transforming applications in
the formulation into task pipelines orchestrated by
evaluating rules at runtime, which could exploit
fine-grained pipeline parallelism as handcrafted
accelerators do. An evaluation shows that this
framework is able to produce datapath with its quality
close to handcrafted designs. Experiments show that
generated accelerators are dramatically more efficient
than those created by current high-level synthesis
tools. Meanwhile, accelerators generated for a set of
irregular applications attain 0.5x~1.9x performance
compared to equivalent software implementations we
selected on a server-grade 10-core processor, with the
memory subsystem remaining as the bottleneck.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Subramanian:2017:FEM,
author = "Suvinay Subramanian and Mark C. Jeffrey and Maleen
Abeydeera and Hyun Ryong Lee and Victor A. Ying and
Joel Emer and Daniel Sanchez",
title = "Fractal: an Execution Model for Fine-Grain Nested
Speculative Parallelism",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "587--599",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080218",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Most systems that support speculative parallelization,
like hardware transactional memory (HTM), do not
support nested parallelism. This sacrifices substantial
parallelism and precludes composing parallel
algorithms. And the few HTMs that do support nested
parallelism focus on parallelizing at the coarsest
(shallowest) levels, incurring large overheads that
squander most of their potential. We present FRACTAL, a
new execution model that supports unordered and
timestamp-ordered nested parallelism. FRACTAL lets
programmers seamlessly compose speculative parallel
algorithms, and lets the architecture exploit
parallelism at all levels. FRACTAL can parallelize a
broader range of applications than prior speculative
execution models. We design a FRACTAL implementation
that extends the Swarm architecture and focuses on
parallelizing at the finest (deepest) levels. Our
approach sidesteps the issues of nested parallel HTMs
and uncovers abundant fine-grain parallelism. As a
result, FRACTAL outperforms prior speculative
architectures by up to 88x at 256 cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Subramaniyan:2017:PAP,
author = "Arun Subramaniyan and Reetuparna Das",
title = "Parallel Automata Processor",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "600--612",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080207",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Finite State Machines (FSM) are widely used
computation models for many application domains. These
embarrassingly sequential applications with irregular
memory access patterns perform poorly on conventional
von-Neumann architectures. The Micron Automata
Processor (AP) is an in-situ memory-based computational
architecture that accelerates non-deterministic finite
automata (NFA) processing in hardware. However, each
FSM on the AP is processed sequentially, limiting
potential speedups. In this paper, we explore the FSM
parallelization problem in the context of the AP.
Extending classical parallelization techniques to NFAs
executing on AP is non-trivial because of high
state-transition tracking overheads and exponential
computation complexity. We present the associated
challenges and propose solutions that leverage both the
unique properties of the NFAs (connected components,
input symbol ranges, convergence, common parent states)
and unique features in the AP (support for simultaneous
transitions, low-overhead flow switching, state vector
cache) to realize parallel NFA execution on the AP. We
evaluate our techniques against several important
benchmarks including NFAs used for network intrusion
detection, malware detection, text processing, protein
motif searching, DNA sequencing, and data analytics.
Our proposed parallelization scheme demonstrates
significant speedup (25.5x on average) compared to
sequential execution on AP. Prior work has already
shown that sequential execution on AP is at least an
order of magnitude better than GPUs, multi-core
processors and Xeon Phi accelerator.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Kateja:2017:VDB,
author = "Rajat Kateja and Anirudh Badam and Sriram Govindan and
Bikash Sharma and Greg Ganger",
title = "{Viyojit}: Decoupling Battery and {DRAM} Capacities
for Battery-Backed {DRAM}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "613--626",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080236",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Non-Volatile Memories (NVMs) can significantly improve
the performance of data-intensive applications. A
popular form of NVM is Battery-backed DRAM, which is
available and in use today with DRAMs latency and
without the endurance problems of emerging NVM
technologies. Modern servers can be provisioned with
up-to 4 TB of DRAM, and provisioning battery backup to
write out such large memories is hard because of the
large battery sizes and the added hardware and cooling
costs. We present Viyojit, a system that exploits the
skew in write working sets of applications to provision
substantially smaller batteries while still ensuring
durability for the entire DRAM capacity. Viyojit
achieves this by bounding the number of dirty pages in
DRAM based on the provisioned battery capacity and
proactively writing out infrequently written pages to
an SSD. Even for write-heavy workloads with less skew
than we observe in analysis of real data center traces,
Viyojit reduces the required battery capacity to 11\%
of the original size, with a performance overhead of
7-25\%. Thus, Viyojit frees battery-backed DRAM from
stunted growth of battery capacities and enables
servers with terabytes of battery-backed DRAM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Young:2017:DCD,
author = "Vinson Young and Prashant J. Nair and Moinuddin K.
Qureshi",
title = "{DICE}: Compressing {DRAM} Caches for Bandwidth and
Capacity",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "627--638",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080243",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper investigates compression for DRAM caches.
As the capacity of DRAM cache is typically large, prior
techniques on cache compression, which solely focus on
improving cache capacity, provide only a marginal
benefit. We show that more performance benefit can be
obtained if the compression of the DRAM cache is
tailored to provide higher bandwidth. If a DRAM cache
can provide two compressed lines in a single access,
and both lines are useful, the effective bandwidth of
the DRAM cache would double. Unfortunately, it is not
straight-forward to compress DRAM caches for bandwidth.
The typically used Traditional Set Indexing (TSI) maps
consecutive lines to consecutive sets, so the multiple
compressed lines obtained from the set are from
spatially distant locations and unlikely to be used
within a short period of each other. We can change the
indexing of the cache to place consecutive lines in the
same set to improve bandwidth; however, when the data
is incompressible, such spatial indexing reduces
effective capacity and causes significant slowdown.
Ideally, we would like to have spatial indexing when
the data is compressible and TSI otherwise. To this
end, we propose Dynamic-Indexing Cache comprEssion
(DICE), a dynamic design that can adapt between spatial
indexing and TSI, depending on the compressibility of
the data. We also propose low-cost Cache Index
Predictors (CIP) that can accurately predict the cache
indexing scheme on access in order to avoid probing
both indices for retrieving a given cache line. Our
studies with a 1GB DRAM cache, on a wide range of
workloads (including SPEC and Graph), show that DICE
improves performance by 19.0\% and reduces
energy-delay-product by 36\% on average. DICE is within
3\% of a design that has double the capacity and double
the bandwidth. DICE incurs a storage overhead of less
than 1KB and does not rely on any OS support.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Drumond:2017:MDE,
author = "Mario Drumond and Alexandros Daglis and Nooshin
Mirzadeh and Dmitrii Ustiugov and Javier Picorel and
Babak Falsafi and Boris Grot and Dionisios
Pnevmatikatos",
title = "The {Mondrian Data Engine}",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "639--651",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080233",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The increasing demand for extracting value out of
ever-growing data poses an ongoing challenge to system
designers, a task only made trickier by the end of
Dennard scaling. As the performance density of
traditional CPU-centric architectures stagnates,
advancing compute capabilities necessitates novel
architectural approaches. Near-memory processing (NMP)
architectures are reemerging as promising candidates to
improve computing efficiency through tight coupling of
logic and memory. NMP architectures are especially
fitting for data analytics, as they provide immense
bandwidth to memory-resident data and dramatically
reduce data movement, the main source of energy
consumption. Modern data analytics operators are
optimized for CPU execution and hence rely on large
caches and employ random memory accesses. In the
context of NMP, such random accesses result in wasteful
DRAM row buffer activations that account for a
significant fraction of the total memory access energy.
In addition, utilizing NMP's ample bandwidth with
fine-grained random accesses requires complex hardware
that cannot be accommodated under NMP's tight area and
power constraints. Our thesis is that efficient NMP
calls for an algorithm-hardware co-design that favors
algorithms with sequential accesses to enable simple
hardware that accesses memory in streams. We introduce
an instance of such a co-designed NMP architecture for
data analytics, the Mondrian Data Engine. Compared to a
CPU-centric and a baseline NMP system, the Mondrian
Data Engine improves the performance of basic data
analytics operators by up to 49x and 5x, and efficiency
by up to 28x and 5x, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Tsai:2017:JSD,
author = "Po-An Tsai and Nathan Beckmann and Daniel Sanchez",
title = "{Jenga}: Software-Defined Cache Hierarchies",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "652--665",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080214",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Caches are traditionally organized as a rigid
hierarchy, with multiple levels of progressively larger
and slower memories. Hierarchy allows a simple, fixed
design to benefit a wide range of applications, since
working sets settle at the smallest (i.e., fastest and
most energy-efficient) level they fit in. However,
rigid hierarchies also add overheads, because each
level adds latency and energy even when it does not fit
the working set. These overheads are expensive on
emerging systems with heterogeneous memories, where the
differences in latency and energy across levels are
small. Significant gains are possible by specializing
the hierarchy to applications. We propose Jenga, a
reconfigurable cache hierarchy that dynamically and
transparently specializes itself to applications. Jenga
builds virtual cache hierarchies out of heterogeneous,
distributed cache banks using simple hardware
mechanisms and an OS runtime. In contrast to prior
techniques that trade energy and bandwidth for
performance (e.g., dynamic bypassing or prefetching),
Jenga eliminates accesses to unwanted cache levels.
Jenga thus improves both performance and energy
efficiency. On a 36-core chip with a 1 GB DRAM cache,
Jenga improves energy-delay product over a combination
of state-of-the-art techniques by 23\% on average and
by up to 85\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Boyapati:2017:AND,
author = "Rahul Boyapati and Jiayi Huang and Pritam Majumder and
Ki Hwan Yum and Eun Jung Kim",
title = "{APPROX-NoC}: a Data Approximation Framework for
Network-On-Chip Architectures",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "666--677",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080241",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "The trend of unsustainable power consumption and large
memory bandwidth demands in massively parallel
multicore systems, with the advent of the big data era,
has brought upon the onset of alternate computation
paradigms utilizing heterogeneity, specialization,
processor-in-memory and approximation. Approximate
Computing is being touted as a viable solution for high
performance computation by relaxing the accuracy
constraints of applications. This trend has been
accentuated by emerging data intensive applications in
domains like image/video processing, machine learning
and big data analytics that allow inaccurate outputs
within an acceptable variance. Leveraging relaxed
accuracy for high throughput in Networks-on-Chip
(NoCs), which have rapidly become the accepted method
for connecting a large number of on-chip components,
has not yet been explored. We propose APPROX-NoC, a
hardware data approximation framework with an online
data error control mechanism for high performance NoCs.
APPROX-NoC facilitates approximate matching of data
patterns, within a controllable value range, to
compress them thereby reducing the volume of data
movement across the chip. Our evaluation shows that
APPROX-NoC achieves on average up to 9\% latency
reduction and 60\% throughput improvement compared with
state-of-the-art NoC data compression mechanisms, while
maintaining low application error. Additionally, with a
data intensive graph processing application we achieve
a 36.7\% latency reduction compared to state-of-the-art
compression mechanisms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Poremba:2017:TBA,
author = "Matthew Poremba and Itir Akgun and Jieming Yin and
Onur Kayiran and Yuan Xie and Gabriel H. Loh",
title = "There and Back Again: Optimizing the Interconnect in
Networks of Memory Cubes",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "678--690",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080251",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "High-performance computing, enterprise, and datacenter
servers are driving demands for higher total memory
capacity as well as memory performance. Memory
``cubes'' with high per-package capacity (from 3D
integration) along with high-speed point-to-point
interconnects provide a scalable memory system
architecture with the potential to deliver both
capacity and performance. Multiple such cubes connected
together can form a ``Memory Network'' (MN), but the
design space for such MNs is quite vast, including
multiple topology types and multiple memory
technologies per memory cube. In this work, we first
analyze several MN topologies with different mixes of
memory package technologies to understand the key
tradeoffs and bottlenecks for such systems. We find
that most of a MN's performance challenges arise from
the interconnection network that binds the memory cubes
together. In particular, arbitration schemes used to
route through MNs, ratio of NVM to DRAM, and specific
topologies used have dramatic impact on performance and
energy results. Our initial analysis indicates that
introducing non-volatile memory to the MN presents a
unique tradeoff between memory array latency and
network latency. We observe that placing NVM cubes in a
specific order in the MN improves performance by
reducing the network size/diameter up to a certain NVM
to DRAM ratio. Novel MN topologies and arbitration
schemes also provide performance and energy deltas by
reducing the hop count of requests and response in the
MN. Based on our analyses, we introduce three
techniques to address MN latency issues: (1)
Distance-based arbitration scheme to improve queuing
latencies throughout the network, (2) skip-list
topology, derived from the classic data structure, to
improve network latency and link usage, and (3) the
MetaCube, a denser memory cube that leverages advanced
packaging technologies to improve latency by reducing
MN size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Fu:2017:FRR,
author = "Binzhang Fu and John Kim",
title = "{Footprint}: Regulating Routing Adaptiveness in
Networks-on-Chip",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "691--702",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080249",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Routing algorithms can improve network performance by
maximizing routing adaptiveness but can be problematic
in the presence of endpoint congestion. Tree-saturation
is a well-known behavior caused by endpoint congestion.
Adaptive routing can, however, spread the congestion
and result in thick branches of the congestion tree ---
creating Head-of-Line (HoL) blocking and degrading
performance. In this work, we identify how ignoring
virtual channels (VCs) and their occupancy during
adaptive routing results in congestion trees with thick
branches as congestion is spread to all VCs. To address
this limitation, we propose Footprint routing algorithm
--- a new adaptive routing algorithm that minimizes the
size of the congestion tree, both in terms of the
number of nodes in the congestion tree as well as
branch thickness. Footprint achieves this by regulating
adaptiveness by requiring packets to follow the path of
prior packets to the same destination if the network is
congested instead of forking a new path or VC. Thus,
the congestion tree is dynamically kept as slim as
possible and reduces HoL blocking or congestion
spreading while maintaining high adaptivity and
maximizing VC buffer utilization. We evaluate the
proposed Footprint routing algorithm against other
adaptive routing algorithms and our simulation results
show that the network saturation throughput can be
improved by up to 43\% (58\%) compared with the fully
adaptive routing (partially adaptive routing)
algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Article{Ebrahimi:2017:ENT,
author = "Masoumeh Ebrahimi and Masoud Daneshtalab",
title = "{EbDa}: a New Theory on Design and Verification of
Deadlock-free Interconnection Networks",
journal = j-COMP-ARCH-NEWS,
volume = "45",
number = "2",
pages = "703--715",
month = may,
year = "2017",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3140659.3080253",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Sep 15 11:09:14 MDT 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Freedom from deadlock is one of the most important
issues when designing routing algorithms in
on-chip/off-chip networks. Many works have been
developed upon Dally's theory proving that a network is
deadlock-free if there is no cyclic dependency on the
channel dependency graph. However, finding such acyclic
graph has been very challenging, which limits Dally's
theory to networks with a low number of channels. In
this paper, we introduce three theorems that directly
lead to routing algorithms with an acyclic channel
dependency graph. We also propose the partitioning
methodology, enabling a design to reach the maximum
adaptiveness for the n-dimensional mesh and k-ary
n-cube topologies with any given number of channels. In
addition, deadlock-free routing algorithms can be
derived ranging from maximally fully adaptive routing
down to deterministic routing. The proposed theorems
can drastically remove the difficulties of designing
deadlock-free routing algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@InProceedings{Lipovski:1998:RBN,
author = "Jack Lipovski",
title = "Retrospective: {Banyan} networks for partitioning
multiprocessor systems",
crossref = "ACM:1998:PAI",
pages = "1--1",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Dennis:1998:RPA,
author = "Jack B. Dennis",
title = "Retrospective: a preliminary architecture for a basic
data flow processor",
crossref = "ACM:1998:PAI",
pages = "2--4",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Patel:1998:RIT,
author = "Janak H. Patel",
title = "Retrospective: {Improving} the throughput of a
pipeline by insertion of delays",
crossref = "ACM:1998:PAI",
pages = "5--5",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Bell:1998:RWW,
author = "Gorden Bell and W. D. Strecker",
title = "Retrospective: {What} have we learned from the
{PDP-11} --- what we have learned from {VAX} and
{Alpha}",
crossref = "ACM:1998:PAI",
pages = "6--10",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Shustek:1998:RIT,
author = "Leonard J. Shustek and Bernard L. Peuto",
title = "Retrospective: an instruction timing model of {CPU}
performance",
crossref = "ACM:1998:PAI",
pages = "11--12",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Ditzel:1998:RRH,
author = "David R. Ditzel and David A. Patterson",
title = "Retrospective: a retrospective on high-level language
computer architecture",
crossref = "ACM:1998:PAI",
pages = "13--14",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Batcher:1998:RAM,
author = "Ken Batcher",
title = "Retrospective: {Architecture} of a massively parallel
processor",
crossref = "ACM:1998:PAI",
pages = "15--16",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Pier:1998:RPH,
author = "Ken Pier",
title = "Retrospective: a processor for a high-performance
personal computer",
crossref = "ACM:1998:PAI",
pages = "17--19",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Kroft:1998:RLF,
author = "David Kroft",
title = "Retrospective: {Lockup}-free instruction fetch\slash
prefetch cache organization",
crossref = "ACM:1998:PAI",
pages = "20--21",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Smith:1998:RSB,
author = "James E. Smith",
title = "Retrospective: a study of branch prediction
strategies",
crossref = "ACM:1998:PAI",
pages = "22--23",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Patterson:1998:RRR,
author = "David A. Patterson and Carlo H. S{\'e}quin",
title = "Retrospective: {RISC I}: a {Reduced Instruction Set
Computer}",
crossref = "ACM:1998:PAI",
pages = "24--26",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "This paper contains in column 1, page 25, the story of
the origin of the name ``RISC''.",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Smith:1998:RDA,
author = "James E. Smith",
title = "Retrospective: {Decoupled} access\slash execute
architectures",
crossref = "ACM:1998:PAI",
pages = "27--28",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Gottlieb:1998:RPR,
author = "Allan Gottlieb",
title = "Retrospective: a personal retrospective on the {NYU}
ultracomputer",
crossref = "ACM:1998:PAI",
pages = "29--31",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Goodman:1998:RUC,
author = "James R. Goodman",
title = "Retrospective: {Using} cache memory to reduce
processor-memory traffic",
crossref = "ACM:1998:PAI",
pages = "32--33",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Fisher:1998:RVL,
author = "Joseph A. Fisher",
title = "Retrospective: {Very} long instruction word
architectures and the {ELI}-512",
crossref = "ACM:1998:PAI",
pages = "34--36",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Emer:1998:RCP,
author = "Joel S. Emer and Douglas W. Clark",
title = "Retrospective: {Characterization} of processor
performance in the {VAX-11\slash 780}",
crossref = "ACM:1998:PAI",
pages = "37--38",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Patel:1998:RLO,
author = "Janak H. Patel",
title = "Retrospective: a low-overhead coherence solution for
multiprocessors with private cache memories",
crossref = "ACM:1998:PAI",
pages = "39--41",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Smith:1998:RIP,
author = "James E. Smith",
title = "Retrospective: {Implementing} precise interrupts in
pipelined processors",
crossref = "ACM:1998:PAI",
pages = "42--42",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Hwu:1998:RHH,
author = "Wen-mei W. Hwu and Yale N. Patt",
title = "Retrospective: {HPSm}, a high performance restricted
data flow architecture having minimal functionality",
crossref = "ACM:1998:PAI",
pages = "43--44",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Gross:1998:RRW,
author = "Thomas Gross and Monica Lam",
title = "Retrospective: a retrospective on the {Warp}
machines",
crossref = "ACM:1998:PAI",
pages = "45--47",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Dubois:1998:RMA,
author = "Michel Dubois and Christoph Scheurich",
title = "Retrospective: {Memory} access buffering in
multiprocessors",
crossref = "ACM:1998:PAI",
pages = "48--50",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Sohi:1998:RII,
author = "Gurindar S. Sohi",
title = "Retrospective: {Instruction} issue logic for
high-performance, interruptible pipelined processors",
crossref = "ACM:1998:PAI",
pages = "51--53",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Dally:1998:RJM,
author = "William J. Dally and Andrew Chien and Stuart Fiske and
Waldemar Horwat and Richard Lethin and Michael Noakes
and Peter Nuth and Ellen Spertus and Deborah Wallach
and D. Scott Wills and Andrew Chang and John Keen",
title = "Retrospective: {The} {J}-machine",
crossref = "ACM:1998:PAI",
pages = "54--58",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Baer:1998:RIP,
author = "Jean-Loup Baer and Wen-Hann Wang",
title = "Retrospective: {On} the inclusion properties for
multi-level cache hierarchies",
crossref = "ACM:1998:PAI",
pages = "59--60",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Hennessy:1998:RED,
author = "John Hennessy",
title = "Retrospective: {Evaluation} of directory schemes for
cache coherence",
crossref = "ACM:1998:PAI",
pages = "61--62",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Adve:1998:RWO,
author = "Sarita V. Adve and Mark D. Hill",
title = "Retrospective: {Weak} ordering --- a new definition",
crossref = "ACM:1998:PAI",
pages = "63--66",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Gharachorloo:1998:RMC,
author = "Kourosh Gharachorloo",
title = "Retrospective: {Memory} consistency and event ordering
in scalable shared-memory multiprocessors",
crossref = "ACM:1998:PAI",
pages = "67--70",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Jouppi:1998:RID,
author = "Norman P. Jouppi",
title = "Retrospective: {Improving} direct-mapped cache
performance by the addition of a small
fully-associative cache and prefetch buffers",
crossref = "ACM:1998:PAI",
pages = "71--73",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Papadopoulos:1998:RME,
author = "George M. Papadopoulos and David E. Culler",
title = "Retrospective: {Monsoon}: an explicit token-store
architecture",
crossref = "ACM:1998:PAI",
pages = "74--76",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Hwu:1998:RIA,
author = "Wen-mei W. Hwu",
title = "Retrospective: {Impact}: an architectural framework
for multiple-instruction issue",
crossref = "ACM:1998:PAI",
pages = "77--79",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Lenoski:1998:RDP,
author = "Daniel E. Lenoski and James P. Laudon",
title = "Retrospective: {The} {DASH} prototype: implementation
and performance",
crossref = "ACM:1998:PAI",
pages = "80--82",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{vonEicken:1998:RAM,
author = "Thorsten von Eicken and David E. Culler and Klaus Erik
Schauser and Seth Copen Goldstein",
title = "Retrospective: {Active} messages: a mechanism for
integrating computation and communication",
crossref = "ACM:1998:PAI",
pages = "83--84",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Ni:1998:RTM,
author = "Lionel Ni",
title = "Retrospective: {The} turn model for adaptive routing",
crossref = "ACM:1998:PAI",
pages = "85--86",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Yeh:1998:RAI,
author = "Tse-Yu Yeh and Yale N. Patt",
title = "Retrospective: {Alternative} implementations of
two-level adaptive training branch prediction",
crossref = "ACM:1998:PAI",
pages = "87--88",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Veidenbaum:1998:RCS,
author = "A. Veidenbaum and P.-C. Yew and D. J. Kuck and C. D.
Polychronopoulos and D. H. Padua and E. S. Davidson and
K. Gallivan",
title = "Retrospective: {The} {Cedar} system",
crossref = "ACM:1998:PAI",
pages = "89--91",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Blumrich:1998:RVM,
author = "Matthias A. Blumrich and Kai Li and Richard D. Alpert
and Cezary Dubnicki and Edward W. Felten and Jonathan
Sandberg",
title = "Retrospective: {Virtual} memory mapped network
interface for the {SHRIMP} multicomputer",
crossref = "ACM:1998:PAI",
pages = "92--94",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Kuskin:1998:RSF,
author = "Jeffrey S. Kuskin",
title = "Retrospective: {The} {Stanford FLASH} multiprocessor",
crossref = "ACM:1998:PAI",
pages = "95--97",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Reinhardt:1998:RTT,
author = "Steven K. Reinhardt and James R. Larus and David A.
Wood",
title = "Retrospective: {Tempest} and {Typhoon}: user-level
shared memory",
crossref = "ACM:1998:PAI",
pages = "98--102",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Agarwal:1998:RAM,
author = "Anant Agarwal",
title = "Retrospective: {The} {MIT Alewife} machine:
architecture and performance",
crossref = "ACM:1998:PAI",
pages = "103--110",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Sohi:1998:RMP,
author = "Gurindar Sohi",
title = "Retrospective: {Multiscalar} processors",
crossref = "ACM:1998:PAI",
pages = "111--114",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Tullsen:1998:RSM,
author = "Dean M. Tullsen and Susan J. Eggers and Henry M.
Levy",
title = "Retrospective: {Simultaneous} multithreading:
maximizing on-chip parallelism",
crossref = "ACM:1998:PAI",
pages = "115--116",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Goke:1998:BNP,
author = "L. Rodney Goke and G. J. Lipovski",
title = "{Banyan} networks for partitioning multiprocessor
systems",
crossref = "ACM:1998:PAI",
pages = "117--124",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Dennis:1998:PAB,
author = "Jack B. Dennis and David P. Misunas",
title = "A preliminary architecture for a basic data-flow
processor",
crossref = "ACM:1998:PAI",
pages = "125--131",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Patel:1998:ITP,
author = "Janak H. Patel and Edward S. Davidson",
title = "Improving the throughput of a pipeline by insertion of
delays",
crossref = "ACM:1998:PAI",
pages = "132--137",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Bell:1998:CSW,
author = "Gordon Bell and William D. Strecker",
title = "Computer structures: what have we learned from the
{PDP-11}?",
crossref = "ACM:1998:PAI",
pages = "138--151",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Peuto:1998:ITM,
author = "Bernard L. Peuto and Leonard J. Shustek",
title = "An instruction timing model of {CPU} performance",
crossref = "ACM:1998:PAI",
pages = "152--165",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Ditzel:1998:RHL,
author = "David R. Ditzel and David A. Patterson",
title = "Retrospective on high-level language computer
architecture",
crossref = "ACM:1998:PAI",
pages = "166--173",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Batcher:1998:AMP,
author = "Kenneth E. Batcher",
title = "Architecture of a massively parallel processor",
crossref = "ACM:1998:PAI",
pages = "174--179",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Lampson:1998:PHP,
author = "Butler W. Lampson and Kenneth A. Pier",
title = "A processor for a high-performance personal computer",
crossref = "ACM:1998:PAI",
pages = "180--194",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Kroft:1998:LFI,
author = "David Kroft",
title = "Lockup-free instruction fetch\slash prefetch cache
organization",
crossref = "ACM:1998:PAI",
pages = "195--201",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Smith:1998:SBP,
author = "James E. Smith",
title = "A study of branch prediction strategies",
crossref = "ACM:1998:PAI",
pages = "202--215",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Patterson:1998:RRI,
author = "David A. Patterson and Carlo H. Sequin",
title = "{RISC I}: a reduced instruction set {VLSI} computer",
crossref = "ACM:1998:PAI",
pages = "216--230",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Smith:1998:DAE,
author = "James E. Smith",
title = "Decoupled access\slash execute computer
architectures",
crossref = "ACM:1998:PAI",
pages = "231--238",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Gottlieb:1998:NUD,
author = "Allan Gottlieb and Ralph Grishman and Clyde P. Kruskal
and Kevin P. McAuliffe and Larry Rudolph and Marc
Snir",
title = "The {NYU Ultracomputer} --- designing a {MIMD},
shared-memory parallel machine",
crossref = "ACM:1998:PAI",
pages = "239--254",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Goodman:1998:UCM,
author = "James R. Goodman",
title = "Using cache memory to reduce processor-memory
traffic",
crossref = "ACM:1998:PAI",
pages = "255--262",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Fisher:1998:VLI,
author = "Joseph A. Fisher",
title = "Very long instruction word architectures and the
{ELI-512}",
crossref = "ACM:1998:PAI",
pages = "263--273",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Emer:1998:CPP,
author = "Joel S. Emer and Douglas W. Clark",
title = "A characterization of processor performance in the
{VAX-11\slash 780}",
crossref = "ACM:1998:PAI",
pages = "274--283",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Papamarcos:1998:LOC,
author = "Mark S. Papamarcos and Janak H. Patel",
title = "A low-overhead coherence solution for multiprocessors
with private cache memories",
crossref = "ACM:1998:PAI",
pages = "284--290",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Smith:1998:IPI,
author = "James E. Smith and Andrew R. Pleszkun",
title = "Implementation of precise interrupts in pipelined
processors",
crossref = "ACM:1998:PAI",
pages = "291--299",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Hwu:1998:HHP,
author = "Wen-Wei Hwu and Yale N. Patt",
title = "{HPSm}, a high performance restricted data flow
architecture having minimal functionality",
crossref = "ACM:1998:PAI",
pages = "300--308",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Annaratone:1998:WAI,
author = "Marco Annaratone and Emmanuel Arnould and Thomas Gross
and H. T. Kung and Monica S. Lam and Onat
Menzilcio{\u{g}}lu and Ken Sarocky and Jon A. Webb",
title = "{Warp} architecture and implementation",
crossref = "ACM:1998:PAI",
pages = "309--319",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Dubois:1998:MAB,
author = "Michel Dubois and Christoph Scheurich and Faye
Briggs",
title = "Memory access buffering in multiprocessors",
crossref = "ACM:1998:PAI",
pages = "320--328",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Sohi:1998:IIL,
author = "Gurindar S. Sohi and Sriram Vajapeyam",
title = "Instruction issue logic for high-performance,
interruptible pipelined processors",
crossref = "ACM:1998:PAI",
pages = "329--336",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Dally:1998:AMD,
author = "William J. Dally and Linda Chao and Andrew Chien and
Soha Hassoun and Waldemar Horwat and Jon Kaplan and
Paul Song and Brian Totty and Scott Wills",
title = "Architecture of a message-driven processor",
crossref = "ACM:1998:PAI",
pages = "337--344",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Baer:1998:IPM,
author = "Jean-Loup Baer and Wen-Hann Wang",
title = "On the inclusion properties for multi-level cache
hierarchies",
crossref = "ACM:1998:PAI",
pages = "345--352",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Agarwal:1998:EDS,
author = "Anant Agarwal and Richard Simoni and John Hennessy and
Mark Horowitz",
title = "An evaluation of directory schemes for cache
coherence",
crossref = "ACM:1998:PAI",
pages = "353--362",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Adve:1998:WON,
author = "Sarita V. Adve and Mark D. Hill",
title = "Weak ordering --- a new definition",
crossref = "ACM:1998:PAI",
pages = "363--375",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Gharachorloo:1998:MCE,
author = "Kourosh Gharachorloo and Daniel Lenoski and James
Laudon and Phillip Gibbons and Anoop Gupta and John
Hennessy",
title = "Memory consistency and event ordering in scalable
shared-memory multiprocessors",
crossref = "ACM:1998:PAI",
pages = "376--387",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Jouppi:1998:IDM,
author = "Norman P. Jouppi",
title = "Improving direct-mapped cache performance by the
addition of a small fully-associative cache prefetch
buffers",
crossref = "ACM:1998:PAI",
pages = "388--397",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Papadopoulos:1998:MET,
author = "Gregory M. Papadopoulos and David E. Culler",
title = "{Monsoon}: an explicit token-store architecture",
crossref = "ACM:1998:PAI",
pages = "398--407",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Chang:1998:IAF,
author = "Pohua P. Chang and Scott A. Mahlke and William Y. Chen
and Nancy J. Warter and Wen-mei W. Hwu",
title = "{IMPACT}: an architectural framework for
multiple-instruction-issue processors",
crossref = "ACM:1998:PAI",
pages = "408--417",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Lenoski:1998:DPI,
author = "Daniel Lenoski and James Laudon and Truman Joe and
David Nakahira and Luis Stevens and Anoop Gupta and
John Hennessy",
title = "The {DASH} prototype: implementation and performance",
crossref = "ACM:1998:PAI",
pages = "418--429",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{vonEicken:1998:AMM,
author = "Thorsten von Eicken and David E. Culler and Seth Copen
Goldstein and Klaus Erik Schauser",
title = "Active messages: a mechanism for integrating
communication and computation",
crossref = "ACM:1998:PAI",
pages = "430--440",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Glass:1998:TMA,
author = "Christopher J. Glass and Lionel M. Ni",
title = "The turn model for adaptive routing",
crossref = "ACM:1998:PAI",
pages = "441--450",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Yeh:1998:AIT,
author = "Tse-Yu Yeh and Yale N. Patt",
title = "Alternative implementations of two-level adaptive
branch prediction",
crossref = "ACM:1998:PAI",
pages = "451--461",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Kuck:1998:CSI,
author = "D. Kuck and E. Davidson and D. Lawrie and A. Sameh and
C.-Q. Zhu and A. Veidenbaum and J. Konicek and P. Yew
and K. Gallivan and W. Jalby and H. Wijshoff and R.
Bramley and U. M. Yang and P. Emrath and D. Padua and
R. Eigenmann and J. Hoeflinger and G. Jayson and Z. Li
and T. Murphy and J. Andrews",
title = "The {Cedar} system and an initial performance study",
crossref = "ACM:1998:PAI",
pages = "462--472",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Blumrich:1998:VMM,
author = "Matthias A. Blumrich and Kai Li and Richard Alpert and
Cezary Dubnicki and Edward W. Felten and Jonathan
Sandberg",
title = "Virtual memory mapped network interface for the
{SHRIMP} multicomputer",
crossref = "ACM:1998:PAI",
pages = "473--484",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Kuskin:1998:SFM,
author = "Jeffrey Kuskin and David Ofelt and Mark Heinrich and
John Heinlein and Richard Simoni and K. Gharachorloo
and J. Chapin and D. Nakahira and J. Baxter and M.
Horowitz and A. Gupta and M. Rosenblum and J.
Hennessy",
title = "The {Stanford FLASH} multiprocessor",
crossref = "ACM:1998:PAI",
pages = "485--496",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Reinhardt:1998:TTU,
author = "Steven K. Reinhardt and James R. Larus and David A.
Wood",
title = "{Tempest} and {Typhoon}: user-level shared memory",
crossref = "ACM:1998:PAI",
pages = "497--508",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Agarwal:1998:AMA,
author = "Anant Agarwal and Ricardo Bianchini and David Chaiken
and Kirk L. Johnson and David Kranz and J. Kubiatowicz
and B.-H. Lim and K. Mackenzie and D. Yeung",
title = "The {MIT Alewife} machine: architecture and
performance",
crossref = "ACM:1998:PAI",
pages = "509--520",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Sohi:1998:MP,
author = "Gurindar S. Sohi and Scott E. Breach and T. N.
Vijaykumar",
title = "Multiscalar processors",
crossref = "ACM:1998:PAI",
pages = "521--532",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@InProceedings{Tullsen:1998:SMM,
author = "Dean M. Tullsen and Susan J. Eggers and Henry M.
Levy",
title = "Simultaneous multithreading: maximizing on-chip
parallelism",
crossref = "ACM:1998:PAI",
pages = "533--544",
year = "1998",
bibdate = "Fri May 12 17:56:30 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
acknowledgement = ack-nhfb,
remark = "25 years of the International Symposia on Computer
Architecture (selected papers).",
}
@Proceedings{Lipovski:1973:PFA,
editor = "G. Jack Lipovski and Stephen Anthony Szygenda",
booktitle = "{Proceedings of the First Annual Symposium on Computer
Architecture, December 9--11, 1973, University of
Florida, Gainesville, Florida}",
title = "{Proceedings of the First Annual Symposium on Computer
Architecture, December 9--11, 1973, University of
Florida, Gainesville, Florida}",
volume = "2(4)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "iv + 277",
year = "1973",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
LCCN = "TK7885.A1",
bibdate = "Fri May 12 14:36:31 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "IEEE catalog no. 73CH0824-3C.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800123",
acknowledgement = ack-nhfb,
}
@Proceedings{King:1975:CPA,
editor = "Willis K. King",
booktitle = "{Conference Proceedings: 2nd Annual Symposium on
Computer Architecture, Houston, Texas, January 20--22,
1975}",
title = "{Conference Proceedings: 2nd Annual Symposium on
Computer Architecture, Houston, Texas, January 20--22,
1975}",
volume = "3(4)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "vi + 231",
year = "1975",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
LCCN = "????",
bibdate = "Fri May 12 14:27:32 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=642089",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1976:CPA,
editor = "{IEEE}",
booktitle = "{Conference Proceedings: 3rd Annual Symposium on
Computer Architecture, Clearwater, Florida, January
19--21, 1976}",
title = "{Conference Proceedings: 3rd Annual Symposium on
Computer Architecture, Clearwater, Florida, January
19--21, 1976}",
volume = "??(??)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "????",
year = "1976",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
LCCN = "????",
bibdate = "Fri May 12 14:20:44 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "IEEE no. 75CH1043-5C.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800110",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1977:CPA,
editor = "{IEEE}",
booktitle = "{Conference Proceedings: 4th Annual Symposium on
Computer Architecture, Silver Spring, Maryland, March
23--25, 1977}",
title = "{Conference Proceedings: 4th Annual Symposium on
Computer Architecture, Silver Spring, Maryland, March
23--25, 1977}",
volume = "??(??)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "ix + 438",
year = "1977",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
LCCN = "QA76.9.A73 S97 1977",
bibdate = "Fri May 12 14:22:57 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "IEEE no. 77 CH1182-5C.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800255",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1979:CPA,
editor = "{IEEE}",
booktitle = "{Conference Proceedings: 5th Annual Symposium on
Computer Architecture, Palo Alto, California, April
23--25, 1979}",
title = "{Conference Proceedings: 5th Annual Symposium on
Computer Architecture, Palo Alto, California, April
23--25, 1979}",
volume = "6(7)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "????",
year = "1979",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
LCCN = "????",
bibdate = "Fri May 12 14:22:57 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800094",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:1980:CPA,
editor = "{ACM}",
booktitle = "{Conference Proceedings: 7th Annual Symposium on
Computer Architecture, La Baule, France, 6--8 May
1980}",
title = "{Conference Proceedings: 7th Annual Symposium on
Computer Architecture, La Baule, France, 6--8 May
1980}",
volume = "8(3)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "333",
year = "1980",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
bibdate = "Fri Sep 16 10:53:10 1994",
bibsource = "ftp://ftp.math.utah.edu/pub/mirrors/ftp.ira.uka.de/bibliography/Math/fparith.bib;
http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800090",
acknowledgement = ack-nj,
}
@Proceedings{IEEE:1981:CPA,
editor = "{IEEE}",
booktitle = "{Conference Proceedings: 8th Annual Symposium on
Computer Architecture, Minneapolis, Minnesota, May
12--14, 1981}",
title = "{Conference Proceedings: 8th Annual Symposium on
Computer Architecture, Minneapolis, Minnesota, May
12--14, 1981}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "????",
year = "1981",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
LCCN = "????",
bibdate = "Fri May 12 14:25:51 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800052",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1982:CPA,
editor = "{IEEE}",
booktitle = "{Conference proceedings: the 9th annual Symposium on
Computer Architecture: April 26--29, 1982, Austin,
Texas}",
title = "{Conference proceedings: the 9th annual Symposium on
Computer Architecture: April 26--29, 1982, Austin,
Texas}",
volume = "10(3)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "viii + 335",
year = "1982",
CODEN = "CANED2, CPAADU",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE), 0149-7111",
LCCN = "QA76.9.A73 S97 1982",
bibdate = "Fri May 12 14:17:17 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order no. 415820. IEEE catalogue no. 82CH1754-1.
IEEE Computer Society order no. 411.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800048",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1983:CPA,
editor = "{IEEE}",
booktitle = "Conference proceedings: the 10th annual International
Symposium on Computer Architecture, Royal Institute of
Technology, Stockholm, Sweden",
title = "Conference proceedings: the 10th annual International
Symposium on Computer Architecture, Royal Institute of
Technology, Stockholm, Sweden",
volume = "11(3)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "ix + 438",
year = "1983",
CODEN = "CANED2",
ISBN = "0-89791-101-6",
ISBN-13 = "978-0-89791-101-6",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 .S97 1983",
bibdate = "Fri May 12 13:53:44 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order number 415830. IEEE catalog no. 83CH1889-5.
IEEE Computer Society order no. 473.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800046",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1984:AIS,
editor = "{IEEE}",
booktitle = "{The 11th Annual International Symposium on Computer
Architecture, June 5--7, 1984, Ann Arbor, Michigan
conference proceedings}",
title = "{The 11th Annual International Symposium on Computer
Architecture, June 5--7, 1984, Ann Arbor, Michigan
conference proceedings}",
volume = "12(3)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "ix + 373",
year = "1984",
CODEN = "CANED2",
ISBN = "0-8186-0538-3 (paperback)",
ISBN-13 = "978-0-8186-0538-3 (paperback)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 S97 1984",
bibdate = "Fri May 12 14:30:24 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order no. 415840. IEEE catalog no. 84CH2051-1.
IEEE Computer Society no. 538.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=800015",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1985:AIS,
editor = "{IEEE}",
booktitle = "{The 12th Annual International Symposium on Computer
Architecture, June 17--19, 1985, Boston, Massachusetts:
conference proceedings}",
title = "{The 12th Annual International Symposium on Computer
Architecture, June 17--19, 1985, Boston, Massachusetts:
conference proceedings}",
volume = "13(3)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiv + 428",
year = "1985",
CODEN = "CANED2",
ISBN = "0-8186-0634-7",
ISBN-13 = "978-0-8186-0634-2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 C65",
bibdate = "Fri May 12 13:47:45 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order no. 415850. IEEE catalog no. 85CH2144-4.
IEEE Computer Society order no. 634.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=327010",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1986:CPT,
editor = "{IEEE}",
booktitle = "{Conference proceedings: the thirteenth annual
International symposium on computer Architecture, June
2--5, 1986, Tokyo, Japan}",
title = "{Conference proceedings: the thirteenth annual
International symposium on computer Architecture, June
2--5, 1986, Tokyo, Japan}",
volume = "14(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiii + 454",
year = "1986",
CODEN = "CANED2",
ISBN = "0-8186-8719-3",
ISBN-13 = "978-0-8186-8719-8",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 I56 1986",
bibdate = "Fri May 12 13:51:08 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order number 415860. IEEE catalogue number
86CH12291-3. IEEE Computer society order number 719.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=17407",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1987:AIS,
editor = "{IEEE}",
booktitle = "{The 14th Annual International Symposium on Computer
Architecture, June 2--5, 1987, Pittsburgh,
Pennsylvania: Conference proceedings}",
title = "{The 14th Annual International Symposium on Computer
Architecture, June 2--5, 1987, Pittsburgh,
Pennsylvania: Conference proceedings}",
volume = "15(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xi + 321",
year = "1987",
CODEN = "CANED2",
ISBN = "0-8186-8776-2 (casebound), 0-8186-0776-9 (paperback),
0-8186-0776-9 (microfiche), 0-8186-4776-0 (casebound)",
ISBN-13 = "978-0-8186-8776-1 (casebound), 978-0-8186-0776-9
(paperback), 978-0-8186-0776-9 (microfiche),
978-0-8186-4776-5 (casebound)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 I56 1987",
bibdate = "Fri May 12 14:07:52 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM Order No. 415870.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=30350",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1988:AIS,
editor = "{IEEE}",
booktitle = "{The 15th Annual International Symposium on Computer
Architecture: Conference proceedings, May 30--June 2,
1988, Honolulu, Hawaii}",
title = "{The 15th Annual International Symposium on Computer
Architecture: Conference proceedings, May 30--June 2,
1988, Honolulu, Hawaii}",
volume = "16(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xi + 461",
year = "1988",
CODEN = "CANED2",
ISBN = "0-8186-0861-7 (paperback), 0-8186-4861-9 (microfiche),
0-8186-8861-0 (case)",
ISBN-13 = "978-0-8186-0861-2 (paperback), 978-0-8186-4861-8
(microfiche), 978-0-8186-8861-4 (case)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 C65",
bibdate = "Fri May 12 14:09:39 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order no. 415880.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=52400",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:1989:PAI,
editor = "{ACM}",
booktitle = "{Proceedings of the 16th annual International
Symposium on Computer Architecture, May 28--June 1,
1989, Jerusalem, Israel}",
title = "{Proceedings of the 16th annual International
Symposium on Computer Architecture, May 28--June 1,
1989, Jerusalem, Israel}",
volume = "17(3)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xvii + 426",
year = "1989",
CODEN = "CANED2",
ISBN = "0-89791-319-1, 0-8186-5948-3 (microfiche),
0-8186-8948-X (casebound), 0-8186-1948-1 (paperback)",
ISBN-13 = "978-0-89791-319-5; 978-0-8186-5948-5 (microfiche);
978-0-8186-8948-2 (casebound); 978-0-8186-1948-9
(paperback)",
ISSN = "0163-5964; 0884-7495",
LCCN = "QA76.9.A73 C65",
bibdate = "Fri May 12 13:42:34 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order number 415890. IEEE catalog number
89CH2705-2. IEEE Computer Society order number 1948.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=74925",
acknowledgement = ack-nhfb,
remark = "ISCA '89 Proceedings",
}
@Proceedings{IEEE:1990:PAI,
editor = "{IEEE}",
booktitle = "{Proceedings: the 17th annual International Symposium
on Computer Architecture, May 28--31, 1990, Seattle,
Washington}",
title = "{Proceedings: the 17th annual International Symposium
on Computer Architecture, May 28--31, 1990, Seattle,
Washington}",
volume = "18(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xv + 378",
year = "1990",
CODEN = "CANED2",
ISBN = "0-8186-9047-X (casebound), 0-89791-366-3,
0-8186-2047-1 (paperback), 0-8186-6047-3 (microfiche)",
ISBN-13 = "978-0-8186-9047-1 (casebound), 978-0-89791-366-9,
978-0-8186-2047-8 (paperback), 978-0-8186-6047-4
(microfiche)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 I56 1990",
bibdate = "Fri May 12 14:04:34 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order no. 415900.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=325164",
acknowledgement = ack-nhfb,
remark = "ISCA '17 Proceedings",
}
@Proceedings{ACM:1991:PIS,
editor = "{ACM}",
booktitle = "{Proceedings of the 18th International Symposium on
Computer Architecture: May 27--30, 1991, Toronto,
Canada}",
title = "{Proceedings of the 18th International Symposium on
Computer Architecture: May 27--30, 1991, Toronto,
Canada}",
volume = "19(3)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xv + 399",
year = "1991",
CODEN = "CANED2",
ISBN = "0-89791-394-9",
ISBN-13 = "978-0-89791-394-2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9 A73 I56 1991",
bibsource = "ftp://ftp.math.utah.edu/pub/mirrors/ftp.ira.uka.de/bibliography/Os/IMMD_IV.bib;
http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order number 415910. IEEE catalog number
91CH2995-9. IEEE Computer Society order number 2146.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=115952",
acknowledgement = ack-nhfb,
remark = "ISCA '18 Proceedings",
}
@Proceedings{IEEE:1992:PAI,
editor = "{IEEE}",
booktitle = "{Proceedings, the 19th annual International Symposium
on Computer Architecture: May 19--21, 1992, Gold Coast,
Queensland, Australia}",
title = "{Proceedings, the 19th annual International Symposium
on Computer Architecture: May 19--21, 1992, Gold Coast,
Queensland, Australia}",
volume = "20(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvi + 439",
year = "1992",
CODEN = "CANED2",
ISBN = "0-89791-509-7 (soft cover), 0-8186-2940-1 (perfect
bound), 0-8186-2942-8 (casebound), 0-8186-2941-X
(microfiche)",
ISBN-13 = "978-0-89791-509-0 (soft cover), 978-0-8186-2940-2
(perfect bound), 978-0-8186-2942-6 (casebound),
978-0-8186-2941-9 (microfiche)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 I56 1992",
bibdate = "Fri May 12 13:59:17 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order number 415920. IEEE catalog number
92CH3156-7. IEEE Computer Society order number 2940.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=139669",
acknowledgement = ack-nhfb,
remark = "ISCA '19 Proceedings",
}
@Proceedings{ACM:1993:AIS,
editor = "{ACM}",
booktitle = "{20th Annual International Symposium on Computer
Architecture ISCA '20, San Diego, CA, USA, May 16--19,
1993}",
title = "{20th Annual International Symposium on Computer
Architecture ISCA '20, San Diego, CA, USA, May 16--19,
1993}",
journal = j-COMP-ARCH-NEWS,
volume = "21(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xii + 361",
month = may,
year = "1993",
CODEN = "CANED2",
ISBN = "0-8186-3810-9 (paper), 0-8186-3811-7 (microfiche),
0-8186-3812-5 (case)",
ISBN-13 = "978-0-8186-3810-7 (paper), 978-0-8186-3811-4
(microfiche), 978-0-8186-3812-1 (case)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 I58 1993",
bibdate = "Sat Sep 28 19:27:02 MDT 1996",
bibsource = "ftp://ftp.math.utah.edu/pub/tex/bib/mach.bib;
http://portal.acm.org/;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order number 415930. IEEE catalog number
93CH3284-7. IEEE Computer Society Press order number
3810-02.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=165123",
acknowledgement = ack-nhfb,
confsponsor = "IEEE; ACM",
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "https://dl.acm.org/loi/sigarch",
}
@Proceedings{IEEE:1994:PAI,
editor = "{IEEE}",
booktitle = "{Proceedings: the 21st Annual International Symposium
on Computer Architecture, April 18--21, 1994, Chicago,
Illinois}",
title = "{Proceedings: the 21st Annual International Symposium
on Computer Architecture, April 18--21, 1994, Chicago,
Illinois}",
volume = "22(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xii + 394",
year = "1994",
CODEN = "CANED2",
ISBN = "0-8186-5510-0 (paper), 0-8186-5511-9 (microfiche),
0-8186-5512-7 (casebound)",
ISBN-13 = "978-0-8186-5510-4 (paper), 978-0-8186-5511-1
(microfiche), 978-0-8186-5512-8 (casebound)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 S97 1994",
bibdate = "Fri May 12 13:45:19 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=191995",
acknowledgement = ack-nhfb,
remark = "ISCA '21 Proceedings",
}
@Proceedings{ACM:1995:PAI,
editor = "{ACM}",
booktitle = "{Proceedings, the 22nd Annual International Symposium
on Computer Architecture: June 22--24, 1995, Santa
Margherita Ligure, Italy}",
title = "{Proceedings, the 22nd Annual International Symposium
on Computer Architecture: June 22--24, 1995, Santa
Margherita Ligure, Italy}",
volume = "23(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xiii + 426",
year = "1995",
CODEN = "CANED2",
ISBN = "0-89791-698-0",
ISBN-13 = "978-0-89791-698-1",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 I56 1995",
bibdate = "Fri May 12 13:37:23 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ACM order number 415950. EEE catalog number 95CS35801.
IEEE Computer Society order number PRO7677.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=223982",
acknowledgement = ack-nhfb,
remark = "ISCA '22",
}
@Proceedings{ACM:1996:PAI,
editor = "{ACM}",
booktitle = "{Proceedings: the 23rd Annual International Symposium
on Computer Architecture, May 22--24, 1996,
Philadelphia, Pennsylvania}",
title = "{Proceedings: the 23rd Annual International Symposium
on Computer Architecture, May 22--24, 1996,
Philadelphia, Pennsylvania}",
volume = "24(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xii + 318",
year = "1996",
ISBN = "0-89791-786-3",
ISBN-13 = "978-0-89791-786-5",
LCCN = "QA76.9.A73 S97 1996",
bibdate = "Fri May 12 12:36:04 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
z3950.bibsys.no:2100/BIBSYS",
note = "ACM order number 415960.",
series = "Computer architecture news",
URL = "http://portal.acm.org/toc.cfm?id=232973",
acknowledgement = ack-nhfb,
remark = "ISCA '23 proceedings; FCRC '96.",
}
@Proceedings{ACM:1997:AIS,
editor = "{ACM}",
booktitle = "{The 24th Annual International Symposium on Computer
Architecture, June 2--4, 1997, Denver, Colorado:
conference proceedings}",
title = "{The 24th Annual International Symposium on Computer
Architecture, June 2--4, 1997, Denver, Colorado:
conference proceedings}",
volume = "25(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "vii + 350",
year = "1997",
CODEN = "CANED2",
ISBN = "0-89791-901-7",
ISBN-13 = "978-0-89791-901-2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73 S94 1997",
bibdate = "Fri May 12 12:36:26 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
z3950.bibsys.no:2100/BIBSYS",
note = "ACM order number 415974.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=264107",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:1998:PAI,
editor = "{ACM}",
booktitle = "{Proceedings: the 25th Annual International Symposium
on Computer Architecture, June 27--July 1, 1998,
Barcelona, Spain}",
title = "{Proceedings: the 25th Annual International Symposium
on Computer Architecture, June 27--July 1, 1998,
Barcelona, Spain}",
volume = "26(3)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xiii + 394",
year = "1998",
ISBN = "0-8186-8491-7, 0-8186-8492-5, 0-8186-8493-3",
ISBN-13 = "978-0-8186-8491-3, 978-0-8186-8492-0,
978-0-8186-8493-7",
LCCN = "QA76.9.A73 S97 1998",
bibdate = "Fri May 12 12:36:10 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
z3950.bibsys.no:2100/BIBSYS",
note = "ACM Order Number 414984. IEEE Computer Society Order
Number PR08491; IEEE Order Plan Catalog Number
98CB36235.",
series = "Computer architecture news",
URL = "http://portal.acm.org/toc.cfm?id=279358;
http://portal.acm.org/toc.cfm?id=285930",
acknowledgement = ack-nhfb,
remark = "ISCA '25 proceedings.",
}
@Proceedings{IEEE:1999:PIS,
editor = "{IEEE}",
booktitle = "{Proceedings of the 26th International Symposium on
Computer Architecture: May 2--4, 1999, Atlanta,
Georgia}",
title = "{Proceedings of the 26th International Symposium on
Computer Architecture: May 2--4, 1999, Atlanta,
Georgia}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xii + 317",
year = "1999",
CODEN = "CANED2",
ISBN = "0-7695-0170-2, 0-7695-0171-0 (casebound)",
ISBN-13 = "978-0-7695-0170-3, 978-0-7695-0171-0 (casebound)",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.9.A73. S9 1999",
bibdate = "Fri May 12 13:33:37 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "IEEE Computer Society Order Number PR00170. IEEE Order
Plan Catalog Number 98CB36367.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=300979",
acknowledgement = ack-nhfb,
remark = "ISCA '99 proceedings",
}
@Proceedings{ACM:2000:PIS,
editor = "{ACM}",
booktitle = "{Proceedings of the 27th International Symposium on
Computer Architecture, June 12--14, 2000, Vancouver,
British Columbia, Canada}",
title = "{Proceedings of the 27th International Symposium on
Computer Architecture, June 12--14, 2000, Vancouver,
British Columbia, Canada}",
volume = "28(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "vi + 327",
year = "2000",
ISBN = "1-58113-232-8",
ISBN-13 = "978-1-58113-232-8",
LCCN = "QA76.9.A73 S97 2000",
bibdate = "Fri May 12 12:35:59 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
z3950.bibsys.no:2100/BIBSYS",
series = "Computer architecture news",
URL = "http://portal.acm.org/toc.cfm?id=339647",
acknowledgement = ack-nhfb,
remark = "ISCA '27 proceedings.",
}
@Book{Hill:2000:RCA,
editor = "Mark D. (Mark Donald) Hill and Norman P. (Norman Paul)
Jouppi and Gurindar Sohi",
booktitle = "Readings in Computer Architecture",
title = "Readings in Computer Architecture",
publisher = pub-MORGAN-KAUFMANN,
address = pub-MORGAN-KAUFMANN:adrsf,
pages = "xviii + 717",
year = "2000",
ISBN = "1-55860-539-8",
ISBN-13 = "978-1-55860-539-8",
LCCN = "QA76.9.A73 H55 2000",
bibdate = "Fri May 12 15:34:46 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/bibnet/authors/w/wilkes-maurice-v.bib;
https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
z3950.loc.gov:7090/Voyager",
URL = "http://books.elsevier.com/bookscat/links/details.asp?isbn=1558605398;
http://www.loc.gov/catdir/description/els033/99044480.html;
http://www.loc.gov/catdir/toc/els033/99044480.html;
https://archive.org/details/readingsincomput0000hill/page/n9/mode/2up?q=Slave+memories;
https://shop.elsevier.com/books/readings-in-computer-architecture/hill/978-0-08-057364-9",
acknowledgement = ack-nhfb,
shorttableofcontents = "1: Classic Machines: Technology,
Implementation, and Economics \\
2: Methods \\
3: Instruction Sets \\
4: Instruction Level Parallelism (ILP) \\
5: Dataflow and Multithreading \\
6: Memory Systems \\
7: I/O: Storage Systems, Networks, and Graphics \\
8: Single-Instruction Multiple Data (SIMD) Parallelism
\\
9: Multiprocessors and Multicomputers \\
10: Recent Implementations and Future Prospects",
subject = "Computer architecture",
tableofcontents = "PREFACE \\
CHAPTER 1: Classic Machines: Technology,
Implementation, and Economics \\
G. M. Amdahl, G. A. Blaauw, F. P. Brooks, Jr.,
``Architecture of the IBM System/360,'' IBM Journal of
Research and Development, April 1964 \\
J. E. Thornton, ``Parallel Operation in the Control
Data 6600,'' Fall Joint Computers Conference, vol. 26,
pp. 33--40, 1961 \\
R. M. Russell, ``The Cray-1 Computer System'', Comm.
ACM, 21, 1 (January 1978), 63--72 \\
J. Kolodzey, ``Cray-1 Computer Technology'', IEEE
Transactions on Components, Hybrids, and Manufacturing
Technology, p181--187, June 1981 \\
G. Moore, ``Cramming More Components onto Integrated
Circuits'', Electronics, p114--117, April 1965 \\
S. Mazor, ``The History of the Microcomputer Invention
and Evolution'', Proc. IEEE Dec '95, 1601--1607 \\
CHAPTER 2: Methods \\
G. M. Amdahl, ``Validity of the Single-Processor
Approach to Achieving Large Scale Computing
Capabilities'', AFIPS Conference Proceedings, (April
1967), 483--485 \\
M. D. Hill and A. J. Smith, ``Evaluating Associativity
in CPU Caches'', IEEE Trans. on Computers, C-38, 12
(December 1989), 1612--1630 \\
J. S. Emer and D. W. Clark, ``A Characterization of
Processor Performance in the VAX-11/780'', Proc.
Eleventh International Symposium on Computer
Architecture, Ann Arbor, MI (June 1984), 301--310 \\
CHAPTER 3: Instruction Sets \\
W. A. Wulf, ``Compilers and Computer Architecture'',
IEEE Computer, 14, 7 (July 1981), 41--48 \\
G. Radin, ``The 801 Minicomputer,'' Proc. Symposium on
Architectural Support for Programming Languages and
Operating Systems, March 1982, 39--47 \\
D. A. Patterson and D. R. Ditzel, ``The Case for the
Reduced Instruction Set Computer,'' ACM Computer
Architecture News, 8, 6, 15 October 1980, 25--33 \\
R. P. Colwell, C. Y. Hitchcock, E. D. Jensen, H. M.
Brinkley Sprunt, C. P. Kollar, ``Computers, Complexity,
and Controversy,'' IEEE Computer, vol. 18, no. 9,
September 1985 \\
J. Crawford, ``Architecture of the Intel 80386,''
Proceedings of ICCD , pp. 155--160, October 1986 \\
S. Mahlke, R. Hank, J. Mccormick, D. August, W. Hwu,
``A Comparison of Full and Partial Predicated Execution
Support for ILP Processors'', Proc. 22nd Annual
Symposium on Computer Architecture (June 1995),
138--150 \\
CHAPTER 4: Instruction Level Parallelism (ILP) \\
D. W. Anderson, F. J. Sparacio and R. M. Tomasulo,
``The IBM System/360 Model 91: Machine Philosophy and
Instruction-Handling'', IBM Journal of Research and
Development January 1967 \\
J. E. Smith and A. R. Pleszkun, ``Implementing Precise
Interrupts in Pipelined Processors'', IEEE Trans. on
Computers, C-37, 5 (May 1988), 562--573 \\
J. E. Smith, ``A Study of Branch Prediction
Strategies'', Proc. Eighth Annual Symposium on Computer
Architecture (May 1981), 135--148 \\
T.-Y. Yeh and Y. N. Patt, ``Two-Level Adaptive Branch
Prediction,'' Proc. 24th Annual Workshop on
Microprogramming (MICRO-24), Albuquerque, NM, (December
1991) \\
Y. N. Patt, W. W. Hwu and M. Shebanow, ``HPS, A New
Microarchitecture: Introduction and Rationale,'' Proc.
18th Annual Workshop on Microprogramming, Pacific
Grove, CA (December 1985), 103--108 \\
G. S. Sohi and S. Vajapeyam, ``Instruction Issue Logic
for High-Performance, Interruptible Pipelined
Processors'', Proc. 14th Annual Symposium on Computer
Architecture (June 1987), 27--34 \\
G. F. Grohoski, ``Machine Organization of the IBM RISC
System/6000 processor,'' IBM Journal of Research and
Development, 34, 1 (January 1990), 37--58 \\
K. C. Yeager, ``The MIPS R10000 Superscalar
Microprocessor'', IEEE Micro, 16, 2, April 1996, 28--40
\\
B. R. Rau and J. A. Fisher, ``Instruction-Level
Parallel Processing: History, Overview, and
Perspective'', The Journal of Supercomputing,, 7, 1,
(??? 1993), 9--50. Reprinted in Rau and Fisher (ed.),
``Instruction-Level Parallelism, Kluwer Academic
Publishers, 1993 \\
CHAPTER 5: Dataflow and Multithreading \\
J. B. Dennis and D. P. Misunas, ``A Preliminary
Architecture for a Basic Data-Flow Processor,'' Proc.
2nd Annual Symposium on Computer Architecture, Computer
Architecture News, 3, 4 (December 1974), 126--132, ACM
\\
Arvind and R. S. Nikhil, ``Executing a Program on the
MIT Tagged-Token Dataflow Architecture'', IEEE Trans.
on Computers, 39, 3 (March 1990), 300--318 \\
B. Smith, ``Architecture and Applications of the HEP
Multiprocessor Computer System'', Proc. of the Int.
Soc. for Opt. Engr. (1981), 241--248 \\
D. M. Tullsen, S. J. Eggers, J. S. Emer, H. M. Levy, J.
L. Lo and R. L. Stamm, ``Exploiting Choice: Instruction
Fetch and Issue on an Implementable Simultaneous
Multithreading Processor'', Proc. 23rd Annual Symposium
on Computer Architecture (May 1996), 191--202 \\
CHAPTER 6: Memory Systems \\
M. V. Wilkes, ``Slave Memories and Dynamic Storage
Allocation'', IEEE Trans. on Electronic Computers,
EC-14, 2 (April 1965), 270--271 \\
J. S. Liptay, ``Structural Aspects of the System/360
Model 85, Part II: The Cache'', IBM Systems Journal,,
7, 1 (1968), 15--21 \\
D. Kroft, ``Lockup-Free Instruction Fetch/Prefetch
Cache Organization'', Proc. Eighth Symposium on
Computer Architecture (May 1981), 81--87 \\
J. R. Goodman, ``Using Cache Memory to Reduce
Processor-Memory Traffic'', Proc. Tenth International
Symposium on Computer Architecture, Stockholm, Sweden
(June 1983), 124--131 \\
N. P. Jouppi, ``Improving Direct-Mapped Cache
Performance by the Addition of a Small
Fully-Associative Cache and Prefetch Buffers'', Proc.
17th Annual Symposium on Computer Architecture,
Computer Architecture News, 18, 2 (June 1990),
364--373, ACM \\
T. Kilburn, D. B. G. Edwards, M. J. Lanigan, F. H.
Sumner, ``One-Level Storage System'', IRE Transactions,
EC-11, 2, (April 1962), 223--235 \\
D. W. Clark and J. S. Emer, ``Performance of the
VAX-11/780 Translation Buffer: Simulation and
Measurement'', ACM Trans. on Computer Systems, 3, 1
(February 1985), 31--62 \\
W. Wang, J.-L. Baer and H. M. Levy, ``Organization and
Performance of a Two-Level Virtual-Real Cache
Hierarchy'', Proc. 16th Annual International Symposium
on Computer Architecture, Jerusalem (June 1989),
140--148 \\
CHAPTER 7: I/O: Storage Systems, Networks, and Graphics
\\
M. Smotherman, ``A Sequencing-based Taxonomy of I/O
Systems and Review of Historical Machines'', ACM
Computer Architecture News 17:5, (September 1989), pgs
5--15. Storage Systems \\
C. Ruemmler and J. Wilkes, ``An Introduction to Disk
Drive Modeling'', IEEE Computer vol 27 #3, March 1994,
pgs 17--28 \\
D. A. Patterson, G. Gibson and R. H. Katz, ``A Case for
Redundant Arrays of Inexpensive Disks (RAID)'', Proc.
ACM SIGMOD Conference, Chicago, Illinois (June 1988).
Networks \\
R. Metcalfe and D. Boggs, ``Ethernet: Distributed
Packet Switching for Local Computer Networks.''
Communications of the ACM, 19(7):395--404 \\
L. Ni and P. McKinley, ``A Survey of Wormhole Routing
Techniques in Direct Networks'', IEEE Computer,
February 1993, vol 26 #2, pgs 62--76. Graphics \\
K. AKERLY, ``Reality Engine Graphics'', SIGGRAPH '93
Proceedings, pp 109--116 \\
CHAPTER 8: Single-Instruction Multiple Data (SIMD)
Parallelism \\
M. J. Flynn, ``Very High-Speed Computing Systems'',
Proceedings of the IEEE , vol. 54, no. 12, December
1966 \\
D. J. Kuck and R. A. Stokes, ``The Burroughs Scientific
Processor (BSP)'', IEEE Trans. on Computers , vol.
C-31, pp. 363--376, May 1982 \\
M. Gokhale, B. Holmes, K. Iobst, ``Processing in
Memory: The Terasys Massively Parallel PIM Array'',
IEEE Computer, 28, 4 (April 1995), 23--31 \\
CHAPTER 9: Multiprocessors and Multicomputers \\
W. A. Wulf and S. P. Harbison, ``Reflections in a pool
of processors / An experience report on C.mmp/Hydra'',
Proc. National Computer Conference (AFIPS) (June 1978)
\\
L. Lamport, ``How to Make a Multiprocessor Computer
That Correctly Executes Multiprocess Programs'', IEEE
Trans. on Computers, C-28, 9 (September 1979), 690--691
\\
L. M. Censier and P. Feautrier, ``A New Solution to
Coherence Problems in Multicache Systems'', IEEE
Transactions on Computers, C-27, 12 (December 1978),
1112--1118 \\
D. Lenoski, J. Laudon, K. Gharachorloo, W. Weber, A.
Gupta, J. Hennessy, M. Horowitz and M. Lam, ``The
Stanford DASH Multiprocessor'', IEEE Computer, 25, 3
(March 1992), 63--79 \\
E. Hagersten, A. Landin, and S. Haridi, ``DDM--A
Cache-Only Memory Architecture'', IEEE Computer, 25, 9
(September 1992), 44--54 \\
C. L. Seitz, ``The Cosmic Cube'', Comm. ACM (January
1985), 22--33 \\
K. Li and P. Hudak, ``Memory Coherence in Shared
Virtual Memory Systems'', ACM Trans. on Computer
Systems, 7, 4 (November 1989), 321--359 \\
CHAPTER 10: Recent Implementations and Future Prospects
\\
D. Alpert, D. Avnon, ``Architecture of the Pentium
Microprocessor'', IEEE Micro, June '93, 11--21 \\
D. Papworth, ``Tuning the Pentium Pro Micro
Architecture'', IEEE Micro April '96, 8--15 \\
M. Slater, ``The Microprocessor Today'', IEEE Micro Dec
'96, 32--44 \\
A. Yu, ``The Future of Microprocessors'', IEEE Micro
Dec '96, 46--53.",
}
@Proceedings{ACM:2001:PIS,
editor = "{ACM}",
booktitle = "{Proceedings of the 28th International Symposium on
Computer Architecture, June 30--July 4, 2001,
G{\"o}teborg, Sweden}",
title = "{Proceedings of the 28th International Symposium on
Computer Architecture, June 30--July 4, 2001,
G{\"o}teborg, Sweden}",
volume = "29(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xi + 289",
year = "2001",
ISBN = "0-7695-1162-7, 0-7695-1163-5, 0-7695-1164-3",
ISBN-13 = "978-0-7695-1162-7, 978-0-7695-1163-4,
978-0-7695-1164-1",
LCCN = "QA76.9.A73 C64 2001",
bibdate = "Fri May 12 12:36:32 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
z3950.bibsys.no:2100/BIBSYS",
series = "Computer architecture news",
URL = "http://portal.acm.org/toc.cfm?id=379240",
acknowledgement = ack-nhfb,
remark = "ISCA '01 proceedings.",
}
@Proceedings{ACM:2002:PIS,
editor = "{ACM}",
booktitle = "{Proceedings of the 29th International Symposium on
Computer Architecture, May 25--29, 2002, Anchorage,
Alaska}",
title = "{Proceedings of the 29th International Symposium on
Computer Architecture, May 25--29, 2002, Anchorage,
Alaska}",
volume = "30(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xv + 331",
year = "2002",
ISBN = "0-7695-1605-X, 0-7695-1606-8, 0-7695-1607-6",
ISBN-13 = "978-0-7695-1605-9, 978-0-7695-1606-6,
978-0-7695-1607-3",
LCCN = "QA76.9.A73 S97 2002",
bibdate = "Fri May 12 12:36:48 MDT 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib;
z3950.bibsys.no:2100/BIBSYS",
series = "Computer architecture news",
URL = "http://portal.acm.org/toc.cfm?id=545215",
acknowledgement = ack-nhfb,
remark = "ISCA '02 proceedings.",
}
@Proceedings{IEEE:2003:PAI,
editor = "{IEEE}",
booktitle = "{Proceedings: 30th Annual International Symposium on
Computer Architecture: San Diego, California, USA, June
9--11, 2003: ISCA '03}",
title = "{Proceedings: 30th Annual International Symposium on
Computer Architecture: San Diego, California, USA, June
9--11, 2003: ISCA '03}",
volume = "31(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xi + 448",
year = "2003",
CODEN = "CANED2",
ISBN = "0-7695-1945-8",
ISBN-13 = "978-0-7695-1945-6",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76 .S93 2002",
bibdate = "Fri May 12 12:35:09 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=859618",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:2004:PAI,
editor = "{ACM}",
booktitle = "{Proceedings: 31st Annual International Symposium on
Computer Architecture: ISCA 2004: [June 19--23, 2004,
M{\"u}nchen, Germany]}",
title = "{Proceedings: 31st Annual International Symposium on
Computer Architecture: ISCA 2004: [June 19--23, 2004,
M{\"u}nchen, Germany]}",
volume = "32(2)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xiv + 388",
year = "2004",
CODEN = "CANED2",
ISBN = "0-7695-2143-6",
ISBN-13 = "978-0-7695-2143-5",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "QA76.5 .S84 2004",
bibdate = "Fri May 12 12:32:28 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "Includes CD-ROM.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=998680",
acknowledgement = ack-nhfb,
remark = "ISCA '05 Proceedings",
}
@Proceedings{IEEE:2005:ISC,
editor = "{IEEE}",
booktitle = "{32nd International Symposium on Computer
Architecture: proceedings, Madison, Wisconsin, June
4--8, 2005}",
title = "{32nd International Symposium on Computer
Architecture: proceedings, Madison, Wisconsin, June
4--8, 2005}",
volume = "33(2)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xviii + 557",
year = "2005",
CODEN = "CANED2",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "????",
bibdate = "Fri May 12 13:31:22 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "Includes CD-ROM.",
series = j-COMP-ARCH-NEWS,
URL = "http://portal.acm.org/toc.cfm?id=1069807",
acknowledgement = ack-nhfb,
remark = "ISCA '05 Proceedings",
}
@Proceedings{IEEE:2006:ISC,
editor = "{IEEE}",
booktitle = "{33rd International Symposium on Computer
Architecture: proceedings, Boston, MA, USA, June
17--21, 2006}",
title = "{33rd International Symposium on Computer
Architecture: proceedings, Boston, MA, USA, June
17--21, 2006}",
volume = "??(??)",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "????",
year = "2006",
CODEN = "CANED2",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
LCCN = "????",
bibdate = "Fri May 12 13:31:22 2006",
bibsource = "https://www.math.utah.edu/pub/tex/bib/sigarch.bib",
series = j-COMP-ARCH-NEWS,
URL = "http://www.ece.neu.edu/conf/isca2006/",
acknowledgement = ack-nhfb,
remark = "ISCA 33 Proceedings",
}